diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regerror.c')
| -rw-r--r-- | ext/mbstring/oniguruma/regerror.c | 176 |
1 files changed, 100 insertions, 76 deletions
diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index a1e86c34f1..5a6c31b82e 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #include "regint.h" @@ -17,118 +17,136 @@ #endif extern char* -regex_error_code_to_format(int code) +onig_error_code_to_format(int code) { char *p; if (code >= 0) return (char* )0; switch (code) { - case REG_MISMATCH: + case ONIG_MISMATCH: p = "mismatch"; break; - case REG_NO_SUPPORT_CONFIG: + case ONIG_NO_SUPPORT_CONFIG: p = "no support in this configuration"; break; - case REGERR_MEMORY: + case ONIGERR_MEMORY: p = "fail to memory allocation"; break; - case REGERR_MATCH_STACK_LIMIT_OVER: + case ONIGERR_MATCH_STACK_LIMIT_OVER: p = "match-stack limit over"; break; - case REGERR_TYPE_BUG: + case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; - case REGERR_PARSER_BUG: + case ONIGERR_PARSER_BUG: p = "internal parser error (bug)"; break; - case REGERR_STACK_BUG: + case ONIGERR_STACK_BUG: p = "stack error (bug)"; break; - case REGERR_UNDEFINED_BYTECODE: + case ONIGERR_UNDEFINED_BYTECODE: p = "undefined bytecode (bug)"; break; - case REGERR_UNEXPECTED_BYTECODE: + case ONIGERR_UNEXPECTED_BYTECODE: p = "unexpected bytecode (bug)"; break; - case REGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: p = "default multibyte-encoding is not setted"; break; - case REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: p = "can't convert to wide-char on specified multibyte-encoding"; break; - case REGERR_END_PATTERN_AT_LEFT_BRACE: + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: p = "end pattern at left brace"; break; - case REGERR_END_PATTERN_AT_LEFT_BRACKET: + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: p = "end pattern at left bracket"; break; - case REGERR_EMPTY_CHAR_CLASS: + case ONIGERR_EMPTY_CHAR_CLASS: p = "empty char-class"; break; - case REGERR_PREMATURE_END_OF_CHAR_CLASS: + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: p = "premature end of char-class"; break; - case REGERR_END_PATTERN_AT_BACKSLASH: + case ONIGERR_END_PATTERN_AT_BACKSLASH: p = "end pattern at backslash"; break; - case REGERR_END_PATTERN_AT_META: + case ONIGERR_END_PATTERN_AT_META: p = "end pattern at meta"; break; - case REGERR_END_PATTERN_AT_CONTROL: + case ONIGERR_END_PATTERN_AT_CONTROL: p = "end pattern at control"; break; - case REGERR_META_CODE_SYNTAX: + case ONIGERR_META_CODE_SYNTAX: p = "illegal meta-code syntax"; break; - case REGERR_CONTROL_CODE_SYNTAX: + case ONIGERR_CONTROL_CODE_SYNTAX: p = "illegal control-code syntax"; break; - case REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: p = "char-class value at end of range"; break; - case REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: p = "char-class value at start of range"; break; - case REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: p = "unmatched range specifier in char-class"; break; - case REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: p = "target of repeat operator is not specified"; break; - case REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: p = "target of repeat operator is invalid"; break; - case REGERR_NESTED_REPEAT_OPERATOR: + case ONIGERR_NESTED_REPEAT_OPERATOR: p = "nested repeat operator"; break; - case REGERR_UNMATCHED_CLOSE_PARENTHESIS: + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: p = "unmatched close parenthesis"; break; - case REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: p = "end pattern with unmatched parenthesis"; break; - case REGERR_END_PATTERN_IN_GROUP: + case ONIGERR_END_PATTERN_IN_GROUP: p = "end pattern in group"; break; - case REGERR_UNDEFINED_GROUP_OPTION: + case ONIGERR_UNDEFINED_GROUP_OPTION: p = "undefined group option"; break; - case REGERR_INVALID_POSIX_BRACKET_TYPE: + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: p = "invalid POSIX bracket type"; break; - case REGERR_INVALID_LOOK_BEHIND_PATTERN: + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: p = "invalid pattern in look-behind"; break; - case REGERR_INVALID_REPEAT_RANGE_PATTERN: + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: p = "invalid repeat range {lower,upper}"; break; - case REGERR_TOO_BIG_NUMBER: + case ONIGERR_TOO_BIG_NUMBER: p = "too big number"; break; - case REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: p = "too big number for repeat range"; break; - case REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: p = "upper is smaller than lower in repeat range"; break; - case REGERR_EMPTY_RANGE_IN_CHAR_CLASS: + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: p = "empty range in char class"; break; - case REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: p = "mismatch multibyte code length in char-class range"; break; - case REGERR_TOO_MANY_MULTI_BYTE_RANGES: + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: p = "too many multibyte code ranges are specified"; break; - case REGERR_TOO_SHORT_MULTI_BYTE_STRING: + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: p = "too short multibyte code string"; break; - case REGERR_TOO_BIG_BACKREF_NUMBER: + case ONIGERR_TOO_BIG_BACKREF_NUMBER: p = "too big backref number"; break; - case REGERR_INVALID_BACKREF: -#ifdef USE_NAMED_SUBEXP + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP p = "invalid backref number/name"; break; #else p = "invalid backref number"; break; #endif - case REGERR_TOO_BIG_WIDE_CHAR_VALUE: + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: p = "too big wide-char value"; break; - case REGERR_TOO_LONG_WIDE_CHAR_VALUE: + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: p = "too long wide-char value"; break; - case REGERR_INVALID_WIDE_CHAR_VALUE: + case ONIGERR_INVALID_WIDE_CHAR_VALUE: p = "invalid wide-char value"; break; - case REGERR_INVALID_SUBEXP_NAME: - p = "invalid subexp name"; break; - case REGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: p = "undefined name <%n> reference"; break; - case REGERR_UNDEFINED_GROUP_REFERENCE: - p = "undefined group reference"; break; - case REGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: p = "multiplex definition name <%n> call"; break; - case REGERR_NEVER_ENDING_RECURSION: + case ONIGERR_NEVER_ENDING_RECURSION: p = "never ending recursion"; break; - case REGERR_OVER_THREAD_PASS_LIMIT_COUNT: + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: p = "over thread pass limit count"; break; default: @@ -139,31 +157,36 @@ regex_error_code_to_format(int code) } -/* for REG_MAX_ERROR_MESSAGE_LEN */ +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ #define MAX_ERROR_PAR_LEN 30 extern int #ifdef HAVE_STDARG_PROTOTYPES -regex_error_code_to_str(UChar* s, int code, ...) +onig_error_code_to_str(UChar* s, int code, ...) #else -regex_error_code_to_str(UChar* s, code, va_alist) +onig_error_code_to_str(s, code, va_alist) + UChar* s; int code; va_dcl #endif { UChar *p, *q; - RegErrorInfo* einfo; + OnigErrorInfo* einfo; int len; va_list vargs; va_init_list(vargs, code); switch (code) { - case REGERR_UNDEFINED_NAME_REFERENCE: - case REGERR_MULTIPLEX_DEFINITION_NAME_CALL: - einfo = va_arg(vargs, RegErrorInfo*); + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); len = einfo->par_end - einfo->par; - q = regex_error_code_to_format(code); + q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { @@ -194,7 +217,7 @@ regex_error_code_to_str(UChar* s, code, va_alist) break; default: - q = regex_error_code_to_format(code); + q = onig_error_code_to_format(code); len = strlen(q); xmemcpy(s, q, len); s[len] = '\0'; @@ -208,13 +231,13 @@ regex_error_code_to_str(UChar* s, code, va_alist) void #ifdef HAVE_STDARG_PROTOTYPES -regex_snprintf_with_pattern(char buf[], int bufsize, RegCharEncoding enc, +onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...) #else -regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) +onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) char buf[]; int bufsize; - RegCharEncoding enc; + OnigEncoding enc; char* pat; char* pat_end; const char *fmt; @@ -222,7 +245,7 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) #endif { int n, need, len; - char *p, *s; + UChar *p, *s; va_list args; va_init_list(args, fmt); @@ -236,21 +259,22 @@ regex_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) s = buf + strlen(buf); p = pat; - while (p < pat_end) { - if (*p == '\\') { + while (p < (UChar* )pat_end) { + if (*p == MC_ESC) { *s++ = *p++; - len = mblen(enc, *p); + len = enc_len(enc, *p); while (len-- > 0) *s++ = *p++; } else if (*p == '/') { - *s++ = '\\'; + *s++ = MC_ESC; *s++ = *p++; } - else if (ismb(enc, *p)) { - len = mblen(enc, *p); + else if (ONIGENC_IS_MBC_HEAD(enc, *p)) { + len = enc_len(enc, *p); while (len-- > 0) *s++ = *p++; } - else if (!IS_PRINT(*p) && !IS_SPACE(*p)) { + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { char b[5]; sprintf(b, "\\%03o", *p & 0377); len = strlen(b); |
