summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma/regposix.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma/regposix.c')
-rw-r--r--ext/mbstring/oniguruma/regposix.c211
1 files changed, 134 insertions, 77 deletions
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c
index ad22338132..3604ccfdbf 100644
--- a/ext/mbstring/oniguruma/regposix.c
+++ b/ext/mbstring/oniguruma/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
- Copyright (C) 2003 K.Kosako (kosako@sofnec.co.jp)
+ Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
@@ -17,7 +17,7 @@
#if 1
#define ENC_STRING_LEN(enc,s,len) do { \
UChar* tmps = (UChar* )(s); \
- /* while (*tmps != 0) tmps += mblen(enc,*tmps); */ \
+ /* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \
while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \
len = tmps - (UChar* )(s); \
} while(0)
@@ -34,57 +34,65 @@ static int
onig2posix_error_code(int code)
{
static O2PERR o2p[] = {
- { REG_MISMATCH, REG_NOMATCH },
- { REG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
- { REGERR_MEMORY, REG_ESPACE },
- { REGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
- { REGERR_TYPE_BUG, REG_EONIG_INTERNAL },
- { REGERR_PARSER_BUG, REG_EONIG_INTERNAL },
- { REGERR_STACK_BUG, REG_EONIG_INTERNAL },
- { REGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
- { REGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
- { REGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
- { REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
- { REGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
- { REGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
- { REGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
- { REGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
- { REGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
- { REGERR_END_PATTERN_AT_META, REG_EESCAPE },
- { REGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
- { REGERR_META_CODE_SYNTAX, REG_BADPAT },
- { REGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
- { REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
- { REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
- { REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
- { REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
- { REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
- { REGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
- { REGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
- { REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
- { REGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
- { REGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
- { REGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
- { REGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
- { REGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
- { REGERR_TOO_BIG_NUMBER, REG_BADPAT },
- { REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
- { REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
- { REGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
- { REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
- { REGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
- { REGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
- { REGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
- { REGERR_INVALID_BACKREF, REG_ESUBREG },
- { REGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
- { REGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
- { REGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
- { REGERR_INVALID_SUBEXP_NAME, REG_BADPAT },
- { REGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
- { REGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
- { REGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
- { REGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
- { REGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
+ { ONIG_MISMATCH, REG_NOMATCH },
+ { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
+ { ONIGERR_MEMORY, REG_ESPACE },
+ { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
+ { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
+ { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
+ { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
+ { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
+ { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
+ { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
+ { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
+ { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
+ { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
+ { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
+ { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
+ { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
+ { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
+ { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
+ { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
+ { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
+ { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
+ { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
+ { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
+ { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
+ { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
+ { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
+ { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
+ { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
+ { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
+ { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
+ { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
+ { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
+ { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
+ { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
+ { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
+ { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
+ { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
+ { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_INVALID_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
+ { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
+ { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
+ { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
+ { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
+ { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
+ { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
+ { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
+ { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
+ { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
+
};
int i;
@@ -103,26 +111,27 @@ extern int
regcomp(regex_t* reg, const char* pattern, int posix_options)
{
int r, len;
- RegSyntaxType* syntax = RegDefaultSyntax;
- RegOptionType options;
+ OnigSyntaxType* syntax = OnigDefaultSyntax;
+ OnigOptionType options;
if ((posix_options & REG_EXTENDED) == 0)
- syntax = REG_SYNTAX_POSIX_BASIC;
+ syntax = ONIG_SYNTAX_POSIX_BASIC;
options = syntax->options;
if ((posix_options & REG_ICASE) != 0)
- REG_OPTION_ON(options, REG_OPTION_IGNORECASE);
+ ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
if ((posix_options & REG_NEWLINE) != 0) {
- REG_OPTION_ON( options, REG_OPTION_NEGATE_SINGLELINE);
- REG_OPTION_OFF(options, REG_OPTION_SINGLELINE);
+ ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
+ ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
}
reg->comp_options = posix_options;
- ENC_STRING_LEN(RegDefaultCharEncoding, pattern, len);
- r = regex_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
- options, RegDefaultCharEncoding, syntax, (RegErrorInfo* )NULL);
- if (r != REG_NORMAL) {
+ ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
+ r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
+ options, OnigEncDefaultCharEncoding, syntax,
+ (OnigErrorInfo* )NULL);
+ if (r != ONIG_NORMAL) {
return onig2posix_error_code(r);
}
@@ -136,11 +145,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
{
int r, i, len;
UChar* end;
- RegOptionType options;
+ OnigOptionType options;
- options = REG_OPTION_POSIX_REGION;
- if ((posix_options & REG_NOTBOL) != 0) options |= REG_OPTION_NOTBOL;
- if ((posix_options & REG_NOTEOL) != 0) options |= REG_OPTION_NOTEOL;
+ options = ONIG_OPTION_POSIX_REGION;
+ if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
+ if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if ((reg->comp_options & REG_NOSUB) != 0) {
pmatch = (regmatch_t* )NULL;
@@ -149,16 +158,16 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->code,str,len);
end = (UChar* )(str + len);
- r = regex_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
- (RegRegion* )pmatch, options);
+ r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
+ (OnigRegion* )pmatch, options);
if (r >= 0) {
r = 0; /* Match */
}
- else if (r == REG_MISMATCH) {
+ else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
for (i = 0; i < nmatch; i++)
- pmatch[i].rm_so = pmatch[i].rm_eo = REG_REGION_NOTPOS;
+ pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
@@ -170,26 +179,74 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
extern void
regfree(regex_t* reg)
{
- regex_free(ONIG_C(reg));
+ onig_free(ONIG_C(reg));
}
extern void
reg_set_encoding(int mb_code)
{
- RegDefaultCharEncoding = REG_MBLEN_TABLE[mb_code];
+ OnigEncoding enc;
+
+ switch (mb_code) {
+ case REG_POSIX_ENCODING_ASCII:
+ enc = ONIG_ENCODING_ASCII;
+ break;
+ case REG_POSIX_ENCODING_EUC_JP:
+ enc = ONIG_ENCODING_EUC_JP;
+ break;
+ case REG_POSIX_ENCODING_SJIS:
+ enc = ONIG_ENCODING_SJIS;
+ break;
+ case REG_POSIX_ENCODING_UTF8:
+ enc = ONIG_ENCODING_UTF8;
+ break;
+ default:
+ return ;
+ break;
+ }
+
+ onigenc_set_default_encoding(enc);
}
extern int
reg_name_to_group_numbers(regex_t* reg,
unsigned char* name, unsigned char* name_end, int** nums)
{
- return regex_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
+ return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
+}
+
+typedef struct {
+ int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*);
+ regex_t* reg;
+ void* arg;
+} i_wrap;
+
+static int i_wrapper(unsigned char* name, unsigned char* name_end,
+ int ng, int* gs,
+ onig_regex_t* reg, void* arg)
+{
+ i_wrap* warg = (i_wrap* )arg;
+
+ return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
+}
+
+extern int
+reg_foreach_name(regex_t* reg,
+ int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*),
+ void* arg)
+{
+ i_wrap warg;
+
+ warg.func = func;
+ warg.reg = reg;
+ warg.arg = arg;
+
+ return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
}
extern int
-reg_foreach_name(regex_t* reg, int (*func)(unsigned char*,int,int*,void*),
- void* arg)
+reg_number_of_names(regex_t* reg)
{
- return regex_foreach_name(ONIG_C(reg), func, arg);
+ return onig_number_of_names(ONIG_C(reg));
}