diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regposix.c')
| -rw-r--r-- | ext/mbstring/oniguruma/regposix.c | 211 |
1 files changed, 134 insertions, 77 deletions
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index ad22338132..3604ccfdbf 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) - Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ @@ -17,7 +17,7 @@ #if 1 #define ENC_STRING_LEN(enc,s,len) do { \ UChar* tmps = (UChar* )(s); \ - /* while (*tmps != 0) tmps += mblen(enc,*tmps); */ \ + /* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \ while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \ len = tmps - (UChar* )(s); \ } while(0) @@ -34,57 +34,65 @@ static int onig2posix_error_code(int code) { static O2PERR o2p[] = { - { REG_MISMATCH, REG_NOMATCH }, - { REG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, - { REGERR_MEMORY, REG_ESPACE }, - { REGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, - { REGERR_TYPE_BUG, REG_EONIG_INTERNAL }, - { REGERR_PARSER_BUG, REG_EONIG_INTERNAL }, - { REGERR_STACK_BUG, REG_EONIG_INTERNAL }, - { REGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, - { REGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, - { REGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, - { REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, - { REGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, - { REGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, - { REGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, - { REGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, - { REGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE }, - { REGERR_END_PATTERN_AT_META, REG_EESCAPE }, - { REGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, - { REGERR_META_CODE_SYNTAX, REG_BADPAT }, - { REGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, - { REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, - { REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, - { REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, - { REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, - { REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, - { REGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, - { REGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, - { REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, - { REGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, - { REGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, - { REGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, - { REGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, - { REGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, - { REGERR_TOO_BIG_NUMBER, REG_BADPAT }, - { REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, - { REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, - { REGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, - { REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, - { REGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, - { REGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, - { REGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, - { REGERR_INVALID_BACKREF, REG_ESUBREG }, - { REGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { REGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { REGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, - { REGERR_INVALID_SUBEXP_NAME, REG_BADPAT }, - { REGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, - { REGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, - { REGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, - { REGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, - { REGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } + { ONIG_MISMATCH, REG_NOMATCH }, + { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, + { ONIGERR_MEMORY, REG_ESPACE }, + { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, + { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, + { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, + { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, + { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, + { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, + { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, + { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, + { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, + { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, + { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, + { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, + { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, + { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, + { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, + { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, + { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, + { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, + { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, + { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, + { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } + }; int i; @@ -103,26 +111,27 @@ extern int regcomp(regex_t* reg, const char* pattern, int posix_options) { int r, len; - RegSyntaxType* syntax = RegDefaultSyntax; - RegOptionType options; + OnigSyntaxType* syntax = OnigDefaultSyntax; + OnigOptionType options; if ((posix_options & REG_EXTENDED) == 0) - syntax = REG_SYNTAX_POSIX_BASIC; + syntax = ONIG_SYNTAX_POSIX_BASIC; options = syntax->options; if ((posix_options & REG_ICASE) != 0) - REG_OPTION_ON(options, REG_OPTION_IGNORECASE); + ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); if ((posix_options & REG_NEWLINE) != 0) { - REG_OPTION_ON( options, REG_OPTION_NEGATE_SINGLELINE); - REG_OPTION_OFF(options, REG_OPTION_SINGLELINE); + ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); + ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); } reg->comp_options = posix_options; - ENC_STRING_LEN(RegDefaultCharEncoding, pattern, len); - r = regex_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), - options, RegDefaultCharEncoding, syntax, (RegErrorInfo* )NULL); - if (r != REG_NORMAL) { + ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); + r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), + options, OnigEncDefaultCharEncoding, syntax, + (OnigErrorInfo* )NULL); + if (r != ONIG_NORMAL) { return onig2posix_error_code(r); } @@ -136,11 +145,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch, { int r, i, len; UChar* end; - RegOptionType options; + OnigOptionType options; - options = REG_OPTION_POSIX_REGION; - if ((posix_options & REG_NOTBOL) != 0) options |= REG_OPTION_NOTBOL; - if ((posix_options & REG_NOTEOL) != 0) options |= REG_OPTION_NOTEOL; + options = ONIG_OPTION_POSIX_REGION; + if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; + if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; if ((reg->comp_options & REG_NOSUB) != 0) { pmatch = (regmatch_t* )NULL; @@ -149,16 +158,16 @@ regexec(regex_t* reg, const char* str, size_t nmatch, ENC_STRING_LEN(ONIG_C(reg)->code,str,len); end = (UChar* )(str + len); - r = regex_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, - (RegRegion* )pmatch, options); + r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, + (OnigRegion* )pmatch, options); if (r >= 0) { r = 0; /* Match */ } - else if (r == REG_MISMATCH) { + else if (r == ONIG_MISMATCH) { r = REG_NOMATCH; for (i = 0; i < nmatch; i++) - pmatch[i].rm_so = pmatch[i].rm_eo = REG_REGION_NOTPOS; + pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; } else { r = onig2posix_error_code(r); @@ -170,26 +179,74 @@ regexec(regex_t* reg, const char* str, size_t nmatch, extern void regfree(regex_t* reg) { - regex_free(ONIG_C(reg)); + onig_free(ONIG_C(reg)); } extern void reg_set_encoding(int mb_code) { - RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code]; + OnigEncoding enc; + + switch (mb_code) { + case REG_POSIX_ENCODING_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case REG_POSIX_ENCODING_EUC_JP: + enc = ONIG_ENCODING_EUC_JP; + break; + case REG_POSIX_ENCODING_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case REG_POSIX_ENCODING_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); } extern int reg_name_to_group_numbers(regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums) { - return regex_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); + return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); +} + +typedef struct { + int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; +} i_wrap; + +static int i_wrapper(unsigned char* name, unsigned char* name_end, + int ng, int* gs, + onig_regex_t* reg, void* arg) +{ + i_wrap* warg = (i_wrap* )arg; + + return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); +} + +extern int +reg_foreach_name(regex_t* reg, + int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), + void* arg) +{ + i_wrap warg; + + warg.func = func; + warg.reg = reg; + warg.arg = arg; + + return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); } extern int -reg_foreach_name(regex_t* reg, int (*func)(unsigned char*,int,int*,void*), - void* arg) +reg_number_of_names(regex_t* reg) { - return regex_foreach_name(ONIG_C(reg), func, arg); + return onig_number_of_names(ONIG_C(reg)); } |
