summaryrefslogtreecommitdiff
path: root/ext/tokenizer/tokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/tokenizer/tokenizer.c')
-rw-r--r--ext/tokenizer/tokenizer.c84
1 files changed, 37 insertions, 47 deletions
diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c
index 439479266e..1c21c08a22 100644
--- a/ext/tokenizer/tokenizer.c
+++ b/ext/tokenizer/tokenizer.c
@@ -106,12 +106,29 @@ PHP_MINFO_FUNCTION(tokenizer)
}
/* }}} */
+static void add_token(zval *return_value, int token_type,
+ unsigned char *text, size_t leng, int lineno) {
+ if (token_type >= 256) {
+ zval keyword;
+ array_init(&keyword);
+ add_next_index_long(&keyword, token_type);
+ add_next_index_stringl(&keyword, (char *) text, leng);
+ add_next_index_long(&keyword, lineno);
+ add_next_index_zval(return_value, &keyword);
+ } else {
+ if (leng == 1) {
+ add_next_index_str(return_value, ZSTR_CHAR(text[0]));
+ } else {
+ add_next_index_stringl(return_value, (char *) text, leng);
+ }
+ }
+}
+
static zend_bool tokenize(zval *return_value, zend_string *source)
{
zval source_zval;
zend_lex_state original_lex_state;
zval token;
- zval keyword;
int token_type;
int token_line = 1;
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
@@ -129,25 +146,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
ZVAL_UNDEF(&token);
while ((token_type = lex_scan(&token))) {
- if (token_type == T_CLOSE_TAG && zendtext[zendleng - 1] != '>') {
- CG(zend_lineno)++;
- }
-
- if (token_type >= 256) {
- array_init(&keyword);
- add_next_index_long(&keyword, token_type);
- if (token_type == T_END_HEREDOC) {
- if (CG(increment_lineno)) {
- token_line = ++CG(zend_lineno);
- CG(increment_lineno) = 0;
- }
- }
- add_next_index_stringl(&keyword, (char *)zendtext, zendleng);
- add_next_index_long(&keyword, token_line);
- add_next_index_zval(return_value, &keyword);
- } else {
- add_next_index_stringl(return_value, (char *)zendtext, zendleng);
- }
+ add_token(return_value, token_type, zendtext, zendleng, token_line);
if (Z_TYPE(token) != IS_UNDEF) {
zval_dtor(&token);
@@ -162,11 +161,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
) {
/* fetch the rest into a T_INLINE_HTML */
if (zendcursor != zendlimit) {
- array_init(&keyword);
- add_next_index_long(&keyword, T_INLINE_HTML);
- add_next_index_stringl(&keyword, (char *)zendcursor, zendlimit - zendcursor);
- add_next_index_long(&keyword, token_line);
- add_next_index_zval(return_value, &keyword);
+ add_token(return_value, T_INLINE_HTML,
+ zendcursor, zendlimit - zendcursor, token_line);
}
break;
}
@@ -174,6 +170,11 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
need_tokens = 3;
}
+ if (CG(increment_lineno)) {
+ CG(zend_lineno)++;
+ CG(increment_lineno) = 0;
+ }
+
token_line = CG(zend_lineno);
}
@@ -186,22 +187,13 @@ static zend_bool tokenize(zval *return_value, zend_string *source)
void on_event(zend_php_scanner_event event, int token, int line, void *context)
{
zval *token_stream = (zval *) context;
- zval keyword;
HashTable *tokens_ht;
zval *token_zv;
switch (event) {
case ON_TOKEN:
if (token == END) break;
- if (token >= 256) {
- array_init(&keyword);
- add_next_index_long(&keyword, token);
- add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
- add_next_index_long(&keyword, line);
- add_next_index_zval(token_stream, &keyword);
- } else {
- add_next_index_stringl(token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
- }
+ add_token(token_stream, token, LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line);
break;
case ON_FEEDBACK:
tokens_ht = Z_ARRVAL_P(token_stream);
@@ -212,12 +204,8 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
break;
case ON_STOP:
if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
- array_init(&keyword);
- add_next_index_long(&keyword, T_INLINE_HTML);
- add_next_index_stringl(&keyword,
- (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor));
- add_next_index_long(&keyword, CG(zend_lineno));
- add_next_index_zval(token_stream, &keyword);
+ add_token(token_stream, T_INLINE_HTML, LANG_SCNG(yy_cursor),
+ LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno));
}
break;
}
@@ -275,9 +263,11 @@ PHP_FUNCTION(token_get_all)
zend_long flags = 0;
zend_bool success;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) {
- return;
- }
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STR(source)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_LONG(flags)
+ ZEND_PARSE_PARAMETERS_END();
if (flags & TOKEN_PARSE) {
success = tokenize_parse(return_value, source);
@@ -297,9 +287,9 @@ PHP_FUNCTION(token_name)
{
zend_long type;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &type) == FAILURE) {
- return;
- }
+ ZEND_PARSE_PARAMETERS_START(1, 1)
+ Z_PARAM_LONG(type)
+ ZEND_PARSE_PARAMETERS_END();
RETVAL_STRING(get_token_type_name(type));
}