diff options
Diffstat (limited to 'ext/json/json_scanner.re')
| -rw-r--r-- | ext/json/json_scanner.re | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/ext/json/json_scanner.re b/ext/json/json_scanner.re new file mode 100644 index 0000000000..7fa9352c09 --- /dev/null +++ b/ext/json/json_scanner.re @@ -0,0 +1,345 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2014 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Jakub Zelenka <bukka@php.net> | + +----------------------------------------------------------------------+ +*/ + +#include "php.h" +#include "php_json_scanner.h" +#include "php_json_scanner_defs.h" +#include "php_json_parser.h" +#include "json_parser.tab.h" + +#define YYCTYPE php_json_ctype +#define YYCURSOR s->cursor +#define YYLIMIT s->limit +#define YYMARKER s->marker +#define YYCTXMARKER s->ctxmarker + +#define YYGETCONDITION() s->state +#define YYSETCONDITION(yystate) s->state = yystate + +#define YYFILL(n) + +#define PHP_JSON_CONDITION_SET(condition) YYSETCONDITION(yyc##condition) +#define PHP_JSON_CONDITION_GOTO(condition) goto yyc_##condition + +#define PHP_JSON_SCANNER_COPY_ESC() php_json_scanner_copy_string(s, 0) +#define PHP_JSON_SCANNER_COPY_UTF() php_json_scanner_copy_string(s, 5) +#define PHP_JSON_SCANNER_COPY_UTF_SP() php_json_scanner_copy_string(s, 11) + + +static void php_json_scanner_copy_string(php_json_scanner *s, int esc_size) +{ + size_t len = s->cursor - s->str_start - esc_size - 1; + if (len) { + memcpy(s->pstr, s->str_start, len); + s->pstr += len; + } +} + +static int php_json_hex_to_int(char code) +{ + if (code >= '0' && code <= '9') { + return code - '0'; + } else if (code >= 'A' && code <= 'F') { + return code - ('A' - 10); + } else if (code >= 'a' && code <= 'f') { + return code - ('a' - 10); + } else { + /* this should never happened (just to suppress compiler warning) */ + return -1; + } +} + +static int php_json_ucs2_to_int_ex(php_json_scanner *s, int size, int start) +{ + int i, code = 0; + php_json_ctype *pc = s->cursor - start; + for (i = 0; i < size; i++) { + code |= php_json_hex_to_int(*(pc--)) << (i * 4); + } + return code; +} + +static int php_json_ucs2_to_int(php_json_scanner *s, int size) +{ + return php_json_ucs2_to_int_ex(s, size, 1); +} + +void php_json_scanner_init(php_json_scanner *s, char *str, int str_len, long options) +{ + s->cursor = (php_json_ctype *) str; + s->limit = (php_json_ctype *) str + str_len; + s->options = options; + PHP_JSON_CONDITION_SET(JS); +} + +int php_json_scan(php_json_scanner *s) +{ + ZVAL_NULL(&s->value); + +std: + s->token = s->cursor; + +/*!re2c + re2c:indent:top = 1; + re2c:yyfill:enable = 0; + + DIGIT = [0-9] ; + DIGITNZ = [1-9] ; + UINT = "0" | ( DIGITNZ DIGIT* ) ; + INT = "-"? UINT ; + HEX = DIGIT | [a-fA-F] ; + HEXNZ = DIGITNZ | [a-fA-F] ; + HEX7 = [0-7] ; + HEXC = DIGIT | [a-cA-C] ; + FLOAT = INT "." DIGIT+ ; + EXP = ( INT | FLOAT ) [eE] [+-]? DIGIT+ ; + NL = "\r"? "\n" ; + WS = [ \t\r]+ ; + EOI = "\000"; + CTRL = [\x00-\x1F] ; + UTF8T = [\x80-\xBF] ; + UTF8_1 = [\x00-\x7F] ; + UTF8_2 = [\xC2-\xDF] UTF8T ; + UTF8_3A = "\xE0" [\xA0-\xBF] UTF8T ; + UTF8_3B = [\xE1-\xEC] UTF8T{2} ; + UTF8_3C = "\xED" [\x80-\x9F] UTF8T ; + UTF8_3D = [\xEE-\xEF] UTF8T{2} ; + UTF8_3 = UTF8_3A | UTF8_3B | UTF8_3C | UTF8_3D ; + UTF8_4A = "\xF0"[\x90-\xBF] UTF8T{2} ; + UTF8_4B = [\xF1-\xF3] UTF8T{3} ; + UTF8_4C = "\xF4" [\x80-\x8F] UTF8T{2} ; + UTF8_4 = UTF8_4A | UTF8_4B | UTF8_4C ; + UTF8 = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4 ; + ANY = [^] ; + ESCPREF = "\\" ; + ESCSYM = ( "\"" | "\\" | "/" | [bfnrt] ) ; + ESC = ESCPREF ESCSYM ; + UTFSYM = "u" ; + UTFPREF = ESCPREF UTFSYM ; + UCS2 = UTFPREF HEX{4} ; + UTF16_1 = UTFPREF "00" HEX7 HEX ; + UTF16_2 = UTFPREF "0" HEX7 HEX{2} ; + UTF16_3 = UTFPREF ( ( ( HEXC | [efEF] ) HEX ) | ( [dD] HEX7 ) ) HEX{2} ; + UTF16_4 = UTFPREF [dD] [89abAB] HEX{2} UTFPREF [dD] [c-fC-F] HEX{2} ; + + <JS>"{" { return '{'; } + <JS>"}" { return '}'; } + <JS>"[" { return '['; } + <JS>"]" { return ']'; } + <JS>":" { return ':'; } + <JS>"," { return ','; } + <JS>"null" { + ZVAL_NULL(&s->value); + return PHP_JSON_T_NUL; + } + <JS>"true" { + ZVAL_TRUE(&s->value); + return PHP_JSON_T_TRUE; + } + <JS>"false" { + ZVAL_FALSE(&s->value); + return PHP_JSON_T_FALSE; + } + <JS>INT { + zend_bool bigint = 0, negative = s->token[0] == '-'; + size_t digits = (size_t) (s->cursor - s->token - negative); + if (digits >= PHP_JSON_INT_MAX_LENGTH) { + if (digits == PHP_JSON_INT_MAX_LENGTH) { + int cmp = strncmp((char *) (s->token + negative), PHP_JSON_INT_MAX_DIGITS, PHP_JSON_INT_MAX_LENGTH); + if (!(cmp < 0 || (cmp == 0 && negative))) { + bigint = 1; + } + } else { + bigint = 1; + } + } + if (!bigint) { + ZVAL_LONG(&s->value, strtol((char *) s->token, NULL, 10)); + return PHP_JSON_T_INT; + } else if (s->options & PHP_JSON_BIGINT_AS_STRING) { + ZVAL_STRINGL(&s->value, (char *) s->token, s->cursor - s->token, 1); + return PHP_JSON_T_STRING; + } else { + ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); + return PHP_JSON_T_DOUBLE; + } + } + <JS>FLOAT|EXP { + ZVAL_DOUBLE(&s->value, zend_strtod((char *) s->token, NULL)); + return PHP_JSON_T_DOUBLE; + } + <JS>NL|WS { goto std; } + <JS>EOI { + if (s->limit < s->cursor) { + return PHP_JSON_T_EOI; + } else { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + } + <JS>["] { + s->str_start = s->cursor; + s->str_esc = 0; + PHP_JSON_CONDITION_SET(STR_P1); + PHP_JSON_CONDITION_GOTO(STR_P1); + } + + <STR_P1>CTRL { + s->errcode = PHP_JSON_ERROR_CTRL_CHAR; + return PHP_JSON_T_ERROR; + } + <STR_P1>UTF16_1 { + s->str_esc += 5; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + <STR_P1>UTF16_2 { + s->str_esc += 4; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + <STR_P1>UTF16_3 { + s->str_esc += 3; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + <STR_P1>UTF16_4 { + s->str_esc += 8; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + <STR_P1>UCS2 { + s->errcode = PHP_JSON_ERROR_UTF16; + return PHP_JSON_T_ERROR; + } + <STR_P1>ESC { + s->str_esc++; + PHP_JSON_CONDITION_GOTO(STR_P1); + } + <STR_P1>ESCPREF { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + <STR_P1>["] { + char *str; + size_t len = s->cursor - s->str_start - s->str_esc - 1; + if (len == 0) { + PHP_JSON_CONDITION_SET(JS); + ZVAL_EMPTY_STRING(&s->value); + return PHP_JSON_T_ESTRING; + } + str = emalloc(len + 1); + str[len] = 0; + ZVAL_STRINGL(&s->value, str, len, 0); + if (s->str_esc) { + s->pstr = (php_json_ctype *) Z_STRVAL(s->value); + s->cursor = s->str_start; + PHP_JSON_CONDITION_SET(STR_P2); + PHP_JSON_CONDITION_GOTO(STR_P2); + } else { + memcpy(Z_STRVAL(s->value), s->str_start, len); + PHP_JSON_CONDITION_SET(JS); + return PHP_JSON_T_STRING; + } + } + <STR_P1>UTF8 { PHP_JSON_CONDITION_GOTO(STR_P1); } + <STR_P1>ANY { + s->errcode = PHP_JSON_ERROR_UTF8; + return PHP_JSON_T_ERROR; + } + + <STR_P2>UTF16_1 { + int utf16 = php_json_ucs2_to_int(s, 2); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) utf16; + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + <STR_P2>UTF16_2 { + int utf16 = php_json_ucs2_to_int(s, 3); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) (0xc0 | (utf16 >> 6)); + *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + <STR_P2>UTF16_3 { + int utf16 = php_json_ucs2_to_int(s, 4); + PHP_JSON_SCANNER_COPY_UTF(); + *(s->pstr++) = (char) (0xe0 | (utf16 >> 12)); + *(s->pstr++) = (char) (0x80 | ((utf16 >> 6) & 0x3f)); + *(s->pstr++) = (char) (0x80 | (utf16 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + <STR_P2>UTF16_4 { + int utf32, utf16_hi, utf16_lo; + utf16_hi = php_json_ucs2_to_int(s, 4); + utf16_lo = php_json_ucs2_to_int_ex(s, 4, 7); + utf32 = ((utf16_lo & 0x3FF) << 10) + (utf16_hi & 0x3FF) + 0x10000; + PHP_JSON_SCANNER_COPY_UTF_SP(); + *(s->pstr++) = (char) (0xf0 | (utf32 >> 18)); + *(s->pstr++) = (char) (0x80 | ((utf32 >> 12) & 0x3f)); + *(s->pstr++) = (char) (0x80 | ((utf32 >> 6) & 0x3f)); + *(s->pstr++) = (char) (0x80 | (utf32 & 0x3f)); + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + <STR_P2>ESCPREF { + char esc; + PHP_JSON_SCANNER_COPY_ESC(); + switch (*s->cursor) { + case 'b': + esc = '\b'; + break; + case 'f': + esc = '\f'; + break; + case 'n': + esc = '\n'; + break; + case 'r': + esc = '\r'; + break; + case 't': + esc = '\t'; + break; + case '\\': + case '/': + case '"': + esc = *s->cursor; + break; + default: + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } + *(s->pstr++) = esc; + ++YYCURSOR; + s->str_start = s->cursor; + PHP_JSON_CONDITION_GOTO(STR_P2); + } + <STR_P2>["] => JS { + PHP_JSON_SCANNER_COPY_ESC(); + return PHP_JSON_T_STRING; + } + <STR_P2>ANY { PHP_JSON_CONDITION_GOTO(STR_P2); } + + <*>ANY { + s->errcode = PHP_JSON_ERROR_SYNTAX; + return PHP_JSON_T_ERROR; + } +*/ + +} + |
