diff options
Diffstat (limited to 'ext/mbstring/oniguruma/regexec.c')
| -rw-r--r-- | ext/mbstring/oniguruma/regexec.c | 1252 |
1 files changed, 748 insertions, 504 deletions
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 2ded602e15..2c082de423 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -1,53 +1,152 @@ /********************************************************************** - regexec.c - Oniguruma (regular expression library) - - Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) - **********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #include "regint.h" +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + static void -region_list_clear(OnigRegion** list) +history_tree_clear(OnigCaptureTreeNode* node) { int i; - if (IS_NOT_NULL(list)) { - for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (IS_NOT_NULL(list[i])) { - xfree(list[i]); - list[i] = (OnigRegion* )0; + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); } } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; } } static void -region_list_free(OnigRegion* r) +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) { - if (IS_NOT_NULL(r->list)) { - region_list_clear(r->list); - xfree(r->list); - r->list = (OnigRegion** )0; + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; } } -static OnigRegion** -region_list_new() +static OnigCaptureTreeNode* +history_node_new() { - int i; - OnigRegion** list; + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} - list = (OnigRegion** )xmalloc(sizeof(OnigRegion*) - * (ONIG_MAX_CAPTURE_HISTORY_GROUP + 1)); - CHECK_NULL_RETURN(list); - for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - list[i] = (OnigRegion* )0; +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +#define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + } + else { + n = parent->allocated * 2; + parent->childs = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + } + CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY); + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + history_tree_add_child(clone, child); } - return list; + return clone; } +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + extern void onig_region_clear(OnigRegion* region) { @@ -56,14 +155,14 @@ onig_region_clear(OnigRegion* region) for (i = 0; i < region->num_regs; i++) { region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; } - region_list_clear(region->list); +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif } extern int onig_region_resize(OnigRegion* region, int n) { - int i; - region->num_regs = n; if (n < ONIG_NREGION) @@ -88,92 +187,43 @@ onig_region_resize(OnigRegion* region, int n) region->allocated = n; } - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - - if (IS_NOT_NULL(region->list)) - region_list_clear(region->list); - return 0; } -static int -region_ensure_size(OnigRegion* region, int n) +extern int +onig_region_resize_clear(OnigRegion* region, int n) { - int i, new_size; - - if (region->allocated >= n) - return 0; - - new_size = region->allocated; - if (new_size == 0) - new_size = ONIG_NREGION; - while (new_size < n) - new_size *= 2; - - if (region->allocated == 0) { - region->beg = (int* )xmalloc(new_size * sizeof(int)); - region->end = (int* )xmalloc(new_size * sizeof(int)); - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = new_size; - } - else if (region->allocated < new_size) { - region->beg = (int* )xrealloc(region->beg, new_size * sizeof(int)); - region->end = (int* )xrealloc(region->end, new_size * sizeof(int)); - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = new_size; - } - - for (i = region->num_regs; i < n; i++) { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); return 0; } - -static int -region_list_add_entry(OnigRegion* region, int group, int start, int end) + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) { - int r, pos; - OnigRegion** list; - - if (group > ONIG_MAX_CAPTURE_HISTORY_GROUP) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + if (at < 0) return ONIGERR_INVALID_ARGUMENT; - if (IS_NULL(region->list)) { - region->list = region_list_new(); - CHECK_NULL_RETURN_VAL(region->list, ONIGERR_MEMORY); + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; } - - list = region->list; - if (IS_NULL(list[group])) { - list[group] = onig_region_new(); - CHECK_NULL_RETURN_VAL(list[group], ONIGERR_MEMORY); - } - - r = region_ensure_size(list[group], list[group]->num_regs + 1); - if (r != 0) return r; - - pos = list[group]->num_regs; - list[group]->beg[pos] = start; - list[group]->end[pos] = end; - list[group]->num_regs++; - + + region->beg[at] = beg; + region->end[at] = end; return 0; } -static void +extern void onig_region_init(OnigRegion* region) { - region->num_regs = 0; - region->allocated = 0; - region->beg = (int* )0; - region->end = (int* )0; - region->list = (OnigRegion** )0; + region->num_regs = 0; + region->allocated = 0; + region->beg = (int* )0; + region->end = (int* )0; + region->history_root = (OnigCaptureTreeNode* )0; } extern OnigRegion* @@ -195,7 +245,9 @@ onig_region_free(OnigRegion* r, int free_self) if (r->end) xfree(r->end); r->allocated = 0; } - region_list_free(r); +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif if (free_self) xfree(r); } } @@ -227,34 +279,19 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) } to->num_regs = from->num_regs; - if (IS_NOT_NULL(from->list)) { - if (IS_NULL(to->list)) { - to->list = region_list_new(); - } - - for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (IS_NOT_NULL(from->list[i])) { - if (IS_NULL(to->list[i])) - to->list[i] = onig_region_new(); +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); - onig_region_copy(to->list[i], from->list[i]); - } - else { - if (IS_NOT_NULL(to->list[i])) { - xfree(to->list[i]); - to->list[i] = (OnigRegion* )0; - } - } - } + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); } - else - region_list_free(to); +#endif } /** stack **/ #define INVALID_STACK_INDEX -1 -typedef int StackIndex; +typedef long StackIndex; typedef struct _StackType { unsigned int type; @@ -324,7 +361,7 @@ typedef struct { int stack_n; OnigOptionType options; OnigRegion* region; - UChar* start; /* search start position (for \G: BEGIN_POSITION) */ + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ } MatchArg; #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ @@ -362,11 +399,26 @@ typedef struct { };\ } while(0) +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + static int stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, StackType* stk_alloc, MatchArg* msa) { - int n; + unsigned int n; StackType *x, *stk_base, *stk_end, *stk; stk_base = *arg_stk_base; @@ -385,7 +437,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, } else { n *= 2; - if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER; + if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { + if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = MatchStackLimitSize; + } x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n); if (IS_NULL(x)) { STACK_SAVE; @@ -831,24 +888,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, }\ } while(0) -#define STRING_CMP_IC(s1,ps2,len) do {\ - if (string_cmp_ic(encode, s1, ps2, len) == 0) \ +#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\ + if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ goto fail; \ } while(0) -static int string_cmp_ic(OnigEncoding enc, +static int string_cmp_ic(OnigEncoding enc, int ambig_flag, UChar* s1, UChar** ps2, int mblen) { - UChar buf1[ONIGENC_MBC_TO_LOWER_MAXLEN]; - UChar buf2[ONIGENC_MBC_TO_LOWER_MAXLEN]; - UChar *p1, *p2, *end, *s2; + UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN]; + UChar *p1, *p2, *end, *s2, *end2; int len1, len2; - s2 = *ps2; - end = s1 + mblen; + s2 = *ps2; + end = s1 + mblen; + end2 = s2 + mblen; while (s1 < end) { - len1 = ONIGENC_MBC_TO_LOWER(enc, s1, buf1); - len2 = ONIGENC_MBC_TO_LOWER(enc, s2, buf2); + len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1); + len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2); if (len1 != len2) return 0; p1 = buf1; p2 = buf2; @@ -857,9 +915,6 @@ static int string_cmp_ic(OnigEncoding enc, p1++; p2++; } - - s1 += enc_len(enc, *s1); - s2 += enc_len(enc, *s2); } *ps2 = s2; @@ -875,8 +930,8 @@ static int string_cmp_ic(OnigEncoding enc, }\ } while(0) -#define STRING_CMP_VALUE_IC(s1,ps2,len,is_fail) do {\ - if (string_cmp_ic(encode, s1, ps2, len) == 0) \ +#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \ is_fail = 1; \ else \ is_fail = 0; \ @@ -891,6 +946,110 @@ static int string_cmp_ic(OnigEncoding enc, #define DATA_ENSURE_CHECK(n) (s + (n) <= end) +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, + StackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + StackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY); + child->group = n; + child->beg = (int )(k->u.mem.pstr - str); + r = history_tree_add_child(node, child); + if (r != 0) return r; + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = (int )(k->u.mem.pstr - str); + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = (int )(k->u.mem.pstr - str); + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif + +#ifdef RUBY_PLATFORM + +typedef struct { + int state; + regex_t* reg; + MatchArg* msa; + StackType* stk_base; +} TrapEnsureArg; + +static VALUE +trap_ensure(VALUE arg) +{ + TrapEnsureArg* ta = (TrapEnsureArg* )arg; + + if (ta->state == 0) { /* trap_exec() is not normal return */ + ONIG_STATE_DEC(ta->reg); + if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) + xfree(ta->stk_base); + + MATCH_ARG_FREE(*(ta->msa)); + } + + return Qnil; +} + +static VALUE +trap_exec(VALUE arg) +{ + TrapEnsureArg* ta; + + rb_trap_exec(); + + ta = (TrapEnsureArg* )arg; + ta->state = 1; /* normal return */ + return Qnil; +} + +extern void +onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base) +{ + VALUE arg; + TrapEnsureArg ta; + + ta.state = 0; + ta.reg = reg; + ta.msa = msa; + ta.stk_base = stk_base; + arg = (VALUE )(&ta); + rb_ensure(trap_exec, arg, trap_ensure, arg); +} + +#define CHECK_INTERRUPT_IN_MATCH_AT do {\ + if (rb_trap_pending) {\ + if (! rb_prohibit_interrupt) {\ + onig_exec_trap(reg, msa, stk_base);\ + }\ + }\ +} while (0) +#else +#define CHECK_INTERRUPT_IN_MATCH_AT +#endif /* RUBY_PLATFORM */ + #ifdef ONIG_DEBUG_STATISTICS #define USE_TIMEOFDAY @@ -935,6 +1094,7 @@ static int MaxStackDepth = 0; } while (0) #ifdef RUBY_PLATFORM + /* * :nodoc: */ @@ -984,7 +1144,7 @@ onig_print_statistics(FILE* f) #endif extern int -onig_is_in_code_range(UChar* p, OnigCodePoint code) +onig_is_in_code_range(const UChar* p, OnigCodePoint code) { OnigCodePoint n, *data; OnigCodePoint low, high, x; @@ -1004,6 +1164,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code) return ((low < n && code >= data[low * 2]) ? 1 : 0); } +static int +code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen) +{ + unsigned int in_cc; + CClassNode* cc = (CClassNode* )node; + + if (enclen == 1) { + in_cc = BITSET_AT(cc->bs, code); + } + else { + UChar* p = ((BBuf* )(cc->mbuf))->p; + in_cc = onig_is_in_code_range(p, code); + } + + if (IS_CCLASS_NOT(cc)) { + return (in_cc ? 0 : 1); + } + else { + return (in_cc ? 1 : 0); + } +} /* matching region of POSIX API */ typedef int regoff_t; @@ -1016,7 +1197,7 @@ typedef struct { /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ static int -match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, +match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, UChar* sprev, MatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1027,18 +1208,18 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, RelAddrType addr; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; - int ignore_case; + OnigAmbigType ambig_flag = reg->ambig_flag; UChar *s, *q, *sbegin; UChar *p = reg->p; char *alloca_base; StackType *stk_alloc, *stk_base, *stk, *stk_end; StackType *stkp; /* used as any purpose. */ + StackIndex si; StackIndex *repeat_stk; StackIndex *mem_start_stk, *mem_end_stk; n = reg->num_repeat + reg->num_mem * 2; STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); - ignore_case = IS_IGNORECASE(option); pop_level = reg->stack_pop_level; num_mem = reg->num_mem; repeat_stk = (StackIndex* )alloca_base; @@ -1062,7 +1243,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; - s = sstart; + s = (UChar* )sstart; while (1) { #ifdef ONIG_DEBUG_MATCH { @@ -1071,7 +1252,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enc_len(encode, *q); + len = enc_len(encode, q); while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } @@ -1079,7 +1260,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, *bp = 0; fputs(buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - onig_print_compiled_byte_code(stderr, p, NULL); + onig_print_compiled_byte_code(stderr, p, NULL, encode); fprintf(stderr, "\n"); } #endif @@ -1134,27 +1315,33 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, } } +#ifdef USE_CAPTURE_HISTORY if (reg->capture_history != 0) { - UChar *pstart, *pend; - for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(reg->capture_history, i) != 0) { - stkp = stk_base; - do { - STACK_GET_MEM_RANGE(stkp, i, pstart, pend); - if (stkp < stk) { - int r; - r = region_list_add_entry(region, i, - pstart - str, pend - str); - if (r) { - STACK_SAVE; - return r; - } - } - stkp++; - } while (stkp < stk); - } - } - } /* list of captures */ + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = sstart - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ #ifdef USE_POSIX_REGION_OPTION } /* else IS_POSIX_REGION() */ #endif @@ -1171,10 +1358,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, goto fail; /* for retry */ } } - else { - /* default behavior: return first-matching result. */ - goto finish; - } + + /* default behavior: return first-matching result. */ + goto finish; break; case OP_EXACT1: STAT_OP_IN(OP_EXACT1); @@ -1192,14 +1378,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC); { int len; - UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf); - DATA_ENSURE(len); + DATA_ENSURE(1); + ss = s; + sp = p; + + exact1_ic_retry: + len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + DATA_ENSURE(0); q = lowbuf; - s += enc_len(encode, *s); while (len-- > 0) { - if (*p != *q) goto fail; + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exact1_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } p++; q++; } } @@ -1276,19 +1479,36 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC); { int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; GET_LENGTH_INC(tlen, p); endp = p + tlen; while (p < endp) { - len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf); - DATA_ENSURE(len); sprev = s; - s += enc_len(encode, *s); + DATA_ENSURE(1); + ss = s; + sp = p; + + exactn_ic_retry: + len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); + DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { - if (*p != *q) goto fail; + if (*p != *q) { +#if 1 + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + s = ss; + p = sp; + goto exactn_ic_retry; + } + else + goto fail; +#else + goto fail; +#endif + } p++; q++; } } @@ -1389,20 +1609,22 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */ + s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ STAT_OP_OUT; break; case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB); - if (! ONIGENC_IS_MBC_HEAD(encode, *s)) goto fail; + if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: GET_LENGTH_INC(tlen, p); { OnigCodePoint code; UChar *ss; - int mb_len = enc_len(encode, *s); + int mb_len; + DATA_ENSURE(1); + mb_len = enc_len(encode, s); DATA_ENSURE(mb_len); ss = s; s += mb_len; @@ -1422,7 +1644,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, *s)) { + if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; goto cclass_mb; } @@ -1442,13 +1664,13 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, *s); + s += enc_len(encode, s); STAT_OP_OUT; break; case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT); - if (! ONIGENC_IS_MBC_HEAD(encode, *s)) { - DATA_ENSURE(1); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; GET_LENGTH_INC(tlen, p); p += tlen; @@ -1460,10 +1682,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, { OnigCodePoint code; UChar *ss; - int mb_len = enc_len(encode, *s); + int mb_len = enc_len(encode, s); if (s + mb_len > end) { - s = end; + DATA_ENSURE(1); + s = (UChar* )end; p += tlen; goto cc_mb_not_success; } @@ -1488,7 +1711,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, *s)) { + if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; goto cclass_mb_not; } @@ -1504,22 +1727,36 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STAT_OP_OUT; break; - case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); - n = enc_len(encode, *s); - if (n > 1) { - DATA_ENSURE(n); - s += n; - } - else { - DATA_ENSURE(1); - if (ONIG_IS_NEWLINE(*s)) goto fail; - s++; + case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enc_len(encode, s); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; } STAT_OP_OUT; break; + case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + s += n; + STAT_OP_OUT; + break; + case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML); - n = enc_len(encode, *s); + DATA_ENSURE(1); + n = enc_len(encode, s); DATA_ENSURE(n); s += n; STAT_OP_OUT; @@ -1528,17 +1765,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR); while (s < end) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, *s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - if (ONIG_IS_NEWLINE(*s)) goto fail; - sprev = s; - s++; - } + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; } STAT_OP_OUT; break; @@ -1546,7 +1777,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR); while (s < end) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, *s); + n = enc_len(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -1565,17 +1796,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, *s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - if (ONIG_IS_NEWLINE(*s)) goto fail; - sprev = s; - s++; - } + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; } p++; STAT_OP_OUT; @@ -1586,7 +1811,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, *s); + n = enc_len(encode, s); if (n >1) { DATA_ENSURE(n); sprev = s; @@ -1606,7 +1831,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, *s); + s += enc_len(encode, s); STAT_OP_OUT; break; @@ -1615,7 +1840,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, *s); + s += enc_len(encode, s); STAT_OP_OUT; break; @@ -1698,7 +1923,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STAT_OP_OUT; continue; } - else if (ONIG_IS_NEWLINE(*sprev) && !ON_STR_END(s)) { + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { STAT_OP_OUT; continue; } @@ -1708,7 +1933,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_END_LINE: STAT_OP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; STAT_OP_OUT; @@ -1717,7 +1942,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, } #endif } - else if (ONIG_IS_NEWLINE(*s)) { + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { STAT_OP_OUT; continue; } @@ -1727,7 +1952,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) { + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */ STAT_OP_OUT; @@ -1736,7 +1961,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, } #endif } - if (ONIG_IS_NEWLINE(*s) && ON_STR_END(s+1)) { + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && + ON_STR_END(s + enc_len(encode, s))) { STAT_OP_OUT; continue; } @@ -1845,7 +2071,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = enc_len(encode, *sprev)) < s) + while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; STAT_OP_OUT; @@ -1876,8 +2102,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, n = pend - pstart; DATA_ENSURE(n); sprev = s; - STRING_CMP_IC(pstart, &s, n); - while (sprev + (len = enc_len(encode, *sprev)) < s) + STRING_CMP_IC(ambig_flag, pstart, &s, n); + while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; STAT_OP_OUT; @@ -1912,7 +2138,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, *sprev)) < s) + while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -1948,10 +2174,10 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, DATA_ENSURE(n); sprev = s; swork = s; - STRING_CMP_VALUE_IC(pstart, &swork, n, is_fail); + STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, *sprev)) < s) + while (sprev + (len = enc_len(encode, sprev)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -1965,7 +2191,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); - ignore_case = IS_IGNORECASE(option); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; STAT_OP_OUT; @@ -1974,7 +2199,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - ignore_case = IS_IGNORECASE(option); STAT_OP_OUT; continue; break; @@ -2006,6 +2230,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, break; case OP_REPEAT_INC: case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: p += SIZE_MEMNUM; break; default: @@ -2072,6 +2298,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, GET_RELADDR_INC(addr, p); p += addr; STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; continue; break; @@ -2150,79 +2377,70 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, break; case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC); - { - StackIndex si; + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ -#ifdef USE_SUBEXP_CALL - if (reg->num_call > 0) { - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - } - else { - si = repeat_stk[mem]; - stkp = STACK_AT(si); - } -#else - si = repeat_stk[mem]; - stkp = STACK_AT(si); -#endif - stkp->u.repeat.count++; - if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ - } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(p, s, sprev); - p = stkp->u.repeat.pcode; - } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; continue; break; - case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); - { - StackIndex si; - - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ -#ifdef USE_SUBEXP_CALL - if (reg->num_call > 0) { - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - } - else { - si = repeat_stk[mem]; - stkp = STACK_AT(si); - } -#else - si = repeat_stk[mem]; - stkp = STACK_AT(si); -#endif - stkp->u.repeat.count++; - if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { - if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - UChar* pcode = stkp->u.repeat.pcode; + case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + break; - STACK_PUSH_REPEAT_INC(si); - STACK_PUSH_ALT(pcode, s, sprev); - } - else { - p = stkp->u.repeat.pcode; - STACK_PUSH_REPEAT_INC(si); - } - } - else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - STACK_PUSH_REPEAT_INC(si); - } - } - STAT_OP_OUT; + case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper || + IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + STAT_OP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; continue; break; + case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + break; + case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS); STACK_PUSH_POS(s, sprev); STAT_OP_OUT; @@ -2265,9 +2483,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); - s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; - sprev = onigenc_get_prev_char_head(encode, str, s); + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); STAT_OP_OUT; continue; break; @@ -2275,7 +2493,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); - q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(q)) { /* too short case -> success. ex. /(?<!XXX)a/.match("a") If you want to change to fail, replace following line. */ @@ -2285,7 +2503,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, else { STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); s = q; - sprev = onigenc_get_prev_char_head(encode, str, s); + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } STAT_OP_OUT; continue; @@ -2358,15 +2576,16 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart, static UChar* slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) + const UChar* text, const UChar* text_end, UChar* text_range) { UChar *t, *p, *s, *end; - end = text_end - (target_end - target) + 1; + end = (UChar* )text_end; + end -= target_end - target - 1; if (end > text_range) end = text_range; - s = text; + s = (UChar* )text; while (s < end) { if (*s == *target) { @@ -2380,97 +2599,66 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s += enc_len(enc, *s); + s += enc_len(enc, s); } return (UChar* )NULL; } -#if 0 static int -str_trans_match_after_head_byte(OnigEncoding enc, - int len, UChar* t, UChar* tend, UChar* p) +str_lower_case_match(OnigEncoding enc, int ambig_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) { - while (--len > 0) { - if (*t != *p) break; - t++; p++; - } - - if (len == 0) { - int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; - - while (t < tend) { - len = enc_len(enc, *p); - lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf); - q = lowbuf; - while (lowlen > 0) { - if (*t++ != *q++) break; - lowlen--; - } - if (lowlen > 0) break; - p += len; - } - if (t == tend) - return 1; - } - - return 0; -} -#endif + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN]; + const UChar* tsave; + const UChar* psave; -static int -str_lower_case_match(OnigEncoding enc, UChar* t, UChar* tend, UChar* p) -{ - int len, lowlen; - UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + tsave = t; + psave = p; + retry: while (t < tend) { - len = enc_len(enc, *p); - lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf); + lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { - if (*t++ != *q++) return 0; + if (*t++ != *q++) { + if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { + ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; + t = tsave; + p = psave; + goto retry; + } + else + return 0; + } lowlen--; } - p += len; } return 1; } static UChar* -slow_search_ic(OnigEncoding enc, +slow_search_ic(OnigEncoding enc, int ambig_flag, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) + const UChar* text, const UChar* text_end, UChar* text_range) { - int len, lowlen; - UChar *t, *p, *s, *end; - UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar *s, *end; - end = text_end - (target_end - target) + 1; + end = (UChar* )text_end; + end -= target_end - target - 1; if (end > text_range) end = text_range; - s = text; + s = (UChar* )text; while (s < end) { - len = enc_len(enc, *s); - lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf); - if (*target == *lowbuf) { - p = lowbuf + 1; - t = target + 1; - while (--lowlen > 0) { - if (*p != *t) break; - p++; *t++; - } - if (lowlen == 0) { - if (str_lower_case_match(enc, t, target_end, s + len)) - return s; - } - } + if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end)) + return s; - s += len; + s += enc_len(enc, s); } return (UChar* )NULL; @@ -2478,13 +2666,15 @@ slow_search_ic(OnigEncoding enc, static UChar* slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { UChar *t, *p, *s; - s = text_end - (target_end - target); + s = (UChar* )text_end; + s -= (target_end - target); if (s > text_start) - s = text_start; + s = (UChar* )text_start; else s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); @@ -2500,58 +2690,52 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s = onigenc_get_prev_char_head(enc, adjust_text, s); + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; } static UChar* -slow_search_backward_ic(OnigEncoding enc, - UChar* target,UChar* target_end, - UChar* text, UChar* adjust_text, - UChar* text_end, UChar* text_start) +slow_search_backward_ic(OnigEncoding enc, int ambig_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { - int len, lowlen; - UChar *t, *p, *s; - UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; + UChar *s; - s = text_end - (target_end - target); + s = (UChar* )text_end; + s -= (target_end - target); if (s > text_start) - s = text_start; + s = (UChar* )text_start; else s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); while (s >= text) { - len = enc_len(enc, *s); - lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf); - if (*target == *lowbuf) { - p = lowbuf + 1; - t = target + 1; - while (--lowlen > 0) { - if (*p != *t) break; - p++; *t++; - } - if (lowlen == 0) { - if (str_lower_case_match(enc, t, target_end, s + len)) - return s; - } - } + if (str_lower_case_match(enc, ambig_flag, + target, target_end, s, text_end)) + return s; - s = onigenc_get_prev_char_head(enc, adjust_text, s); + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); } return (UChar* )NULL; } static UChar* -bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { - UChar *s, *t, *p, *end; - UChar *tail; + const UChar *s, *t, *p, *end; + const UChar *tail; int skip; +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", + (int )text, (int )text_end, (int )text_range); +#endif + end = text_range + (target_end - target) - 1; if (end > text_end) end = text_end; @@ -2559,7 +2743,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, tail = target_end - 1; s = text; while ((s - text) < target_end - target) { - s += enc_len(reg->enc, *s); + s += enc_len(reg->enc, s); } s--; /* set to text check tail position. */ @@ -2570,14 +2754,16 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); skip = reg->map[*s]; - p++; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; t = p; - while ((p - t) < skip) { - p += enc_len(reg->enc, *p); - } + do { + p += enc_len(reg->enc, p); + } while ((p - t) < skip && p < text_end); + s += (p - t); } } @@ -2588,14 +2774,16 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); skip = reg->int_map[*s]; - p++; + p = s + 1; + if (p >= text_end) return (UChar* )NULL; t = p; - while ((p - t) < skip) { - p += enc_len(reg->enc, *p); - } + do { + p += enc_len(reg->enc, p); + } while ((p - t) < skip && p < text_end); + s += (p - t); } } @@ -2603,11 +2791,11 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end, } static UChar* -bm_search(regex_t* reg, UChar* target, UChar* target_end, - UChar* text, UChar* text_end, UChar* text_range) +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) { - UChar *s, *t, *p, *end; - UChar *tail; + const UChar *s, *t, *p, *end; + const UChar *tail; end = text_range + (target_end - target) - 1; if (end > text_end) @@ -2622,7 +2810,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); s += reg->map[*s]; } } @@ -2633,7 +2821,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end, while (t >= target && *p == *t) { p--; t--; } - if (t < target) return p + 1; + if (t < target) return (UChar* )(p + 1); s += reg->int_map[*s]; } } @@ -2641,11 +2829,10 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end, } static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, - int ignore_case, int** skip) +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) + { int i, len; - UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN]; if (IS_NULL(*skip)) { *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); @@ -2656,24 +2843,18 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*skip)[i] = len; - if (ignore_case) { - for (i = len - 1; i > 0; i--) { - ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf); - (*skip)[*lowbuf] = i; - } - } - else { - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - } + for (i = len - 1; i > 0; i--) + (*skip)[s[i]] = i; + return 0; } static UChar* -bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, - UChar* adjust_text, UChar* text_end, UChar* text_start) +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) { - UChar *s, *t, *p; + const UChar *s, *t, *p; s = text_end - (target_end - target); if (text_start < s) @@ -2688,7 +2869,7 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, p++; t++; } if (t == target_end) - return s; + return (UChar* )s; s -= reg->int_map_backward[*s]; s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); @@ -2698,26 +2879,28 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text, } static UChar* -map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range) +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range) { - UChar *s = text; + const UChar *s = text; while (s < text_range) { - if (map[*s]) return s; + if (map[*s]) return (UChar* )s; - s += enc_len(enc, *s); + s += enc_len(enc, s); } return (UChar* )NULL; } static UChar* map_search_backward(OnigEncoding enc, UChar map[], - UChar* text, UChar* adjust_text, UChar* text_start) + const UChar* text, const UChar* adjust_text, + const UChar* text_start) { - UChar *s = text_start; + const UChar *s = text_start; while (s >= text) { - if (map[*s]) return s; + if (map[*s]) return (UChar* )s; s = onigenc_get_prev_char_head(enc, adjust_text, s); } @@ -2725,13 +2908,32 @@ map_search_backward(OnigEncoding enc, UChar map[], } extern int -onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) { int r; UChar *prev; MatchArg msa; +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + ONIG_STATE_INC(reg); + } +#endif /* USE_MULTI_THREAD_SYSTEM */ + MATCH_ARG_INIT(msa, option, region, at); if (region @@ -2739,21 +2941,23 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region, && !IS_POSIX_REGION(option) #endif ) { - r = onig_region_resize(region, reg->num_mem + 1); + r = onig_region_resize_clear(region, reg->num_mem + 1); } else r = 0; if (r == 0) { - prev = onigenc_get_prev_char_head(reg->enc, str, at); + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, at, prev, &msa); } + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC(reg); return r; } static int -forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, UChar* range, UChar** low, UChar** high, UChar** low_prev) { UChar *p, *pprev = (UChar* )NULL; @@ -2770,7 +2974,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; - while (p < q) p += enc_len(reg->enc, *p); + while (p < q) p += enc_len(reg->enc, p); } } @@ -2780,7 +2984,8 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range); + p = slow_search_ic(reg->enc, reg->ambig_flag, + reg->exact, reg->exact_end, p, end, range); break; case ONIG_OPTIMIZE_EXACT_BM: @@ -2800,7 +3005,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, if (p - reg->dmin < s) { retry_gate: pprev = p; - p += enc_len(reg->enc, *p); + p += enc_len(reg->enc, p); goto retry; } @@ -2812,19 +3017,19 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (!ONIG_IS_NEWLINE(*prev)) + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; } break; case ANCHOR_END_LINE: if (ON_STR_END(p)) { - prev = onigenc_get_prev_char_head(reg->enc, + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); - if (prev && ONIG_IS_NEWLINE(*prev)) + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; } - else if (!ONIG_IS_NEWLINE(*p)) + else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) goto retry_gate; break; } @@ -2845,7 +3050,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, *low = p - reg->dmax; if (*low > s) { *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, low_prev); + *low, (const UChar** )low_prev); if (low_prev && IS_NULL(*low_prev)) *low_prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : s), *low); @@ -2872,13 +3077,14 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, } static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, - int ignore_case, int** skip)); + int** skip)); #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 static int -backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, - UChar* range, UChar* adjrange, UChar** low, UChar** high) +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) { int r; UChar *p; @@ -2895,8 +3101,9 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, break; case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->exact, - reg->exact_end, range, adjrange, end, p); + p = slow_search_backward_ic(reg->enc, reg->ambig_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); break; case ONIG_OPTIMIZE_EXACT_BM: @@ -2905,7 +3112,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) goto exact_method; - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 0, + r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, &(reg->int_map_backward)); if (r) return r; } @@ -2926,7 +3133,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, case ANCHOR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (!ONIG_IS_NEWLINE(*prev)) { + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } @@ -2937,12 +3144,12 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, if (ON_STR_END(p)) { prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(prev)) goto fail; - if (ONIG_IS_NEWLINE(*prev)) { + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } } - else if (!ONIG_IS_NEWLINE(*p)) { + else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) { p = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(p)) goto fail; goto retry; @@ -2974,18 +3181,19 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s, extern int -onig_search(regex_t* reg, UChar* str, UChar* end, - UChar* start, UChar* range, OnigRegion* region, OnigOptionType option) +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) { int r; UChar *s, *prev; MatchArg msa; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - reg->state++; /* increment as search counter */ - if (IS_NOT_NULL(reg->chain)) { +#ifdef USE_MULTI_THREAD_SYSTEM + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { onig_chain_reduce(reg); - reg->state++; + ONIG_STATE_INC(reg); } } else { @@ -2995,12 +3203,14 @@ onig_search(regex_t* reg, UChar* str, UChar* end, return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - reg->state++; /* increment as search counter */ + ONIG_STATE_INC(reg); } +#endif /* USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif if (region @@ -3008,7 +3218,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, && !IS_POSIX_REGION(option) #endif ) { - r = onig_region_resize(region, reg->num_mem + 1); + r = onig_region_resize_clear(region, reg->num_mem + 1); if (r) goto finish_no_msa; } @@ -3049,7 +3259,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, } } else if (reg->anchor & ANCHOR_END_BUF) { - semi_end = end; + semi_end = (UChar* )end; end_buf: if ((OnigDistance )(semi_end - str) < reg->anchor_dmin) @@ -3082,14 +3292,16 @@ onig_search(regex_t* reg, UChar* str, UChar* end, } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { - if (ONIG_IS_NEWLINE(end[-1])) { - semi_end = end - 1; + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); + + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + semi_end = pre_end; if (semi_end > str && start <= semi_end) { goto end_buf; } } else { - semi_end = end; + semi_end = (UChar* )end; goto end_buf; } } @@ -3098,14 +3310,15 @@ onig_search(regex_t* reg, UChar* str, UChar* end, } } else if (str == end) { /* empty string */ - static UChar* address_for_empty_string = ""; + static const UChar* address_for_empty_string = ""; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search: empty string.\n"); #endif if (reg->threshold_len == 0) { - s = start = end = str = address_for_empty_string; + start = end = str = address_for_empty_string; + s = (UChar* )start; prev = (UChar* )NULL; MATCH_ARG_INIT(msa, option, region, start); @@ -3122,7 +3335,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, MATCH_ARG_INIT(msa, option, region, start); - s = start; + s = (UChar* )start; if (range > start) { /* forward search */ if (s > str) prev = onigenc_get_prev_char_head(reg->enc, str, s); @@ -3132,13 +3345,13 @@ onig_search(regex_t* reg, UChar* str, UChar* end, if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; - sch_range = range; + sch_range = (UChar* )range; if (reg->dmax != 0) { if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = end; + sch_range = (UChar* )end; else { sch_range += reg->dmax; - if (sch_range > end) sch_range = end; + if (sch_range > end) sch_range = (UChar* )end; } } if (reg->dmax != ONIG_INFINITE_DISTANCE && @@ -3153,13 +3366,14 @@ onig_search(regex_t* reg, UChar* str, UChar* end, while (s <= high) { MATCH_AND_RETURN_CHECK; prev = s; - s += enc_len(reg->enc, *s); + s += enc_len(reg->enc, s); } if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { if (IS_NOT_NULL(prev)) { - while (!ONIG_IS_NEWLINE(*prev) && s < range) { + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && + s < range) { prev = s; - s += enc_len(reg->enc, *s); + s += enc_len(reg->enc, s); } } } @@ -3176,19 +3390,23 @@ onig_search(regex_t* reg, UChar* str, UChar* end, do { MATCH_AND_RETURN_CHECK; prev = s; - s += enc_len(reg->enc, *s); + s += enc_len(reg->enc, s); } while (s <= range); /* exec s == range, because empty match with /$/. */ } else { /* backward search */ if (reg->optimize != ONIG_OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + else + adjrange = (UChar* )end; + if (reg->dmax != ONIG_INFINITE_DISTANCE && (end - range) >= reg->threshold_len) { do { sch_start = s + reg->dmax; - if (sch_start > end) sch_start = end; + if (sch_start > end) sch_start = (UChar* )end; if (backward_search_range(reg, str, end, sch_start, range, adjrange, &low, &high) <= 0) goto mismatch; @@ -3210,10 +3428,10 @@ onig_search(regex_t* reg, UChar* str, UChar* end, sch_start = s; if (reg->dmax != 0) { if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = end; + sch_start = (UChar* )end; else { sch_start += reg->dmax; - if (sch_start > end) sch_start = end; + if (sch_start > end) sch_start = (UChar* )end; else sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, start, sch_start); @@ -3236,7 +3454,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, finish: MATCH_ARG_FREE(msa); - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ @@ -3257,7 +3475,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3265,7 +3483,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end, return r; match: - reg->state--; /* decrement as search counter */ + ONIG_STATE_DEC(reg); MATCH_ARG_FREE(msa); return s - str; } @@ -3282,18 +3500,44 @@ onig_get_options(regex_t* reg) return reg->options; } +extern OnigAmbigType +onig_get_ambig_flag(regex_t* reg) +{ + return reg->ambig_flag; +} + extern OnigSyntaxType* onig_get_syntax(regex_t* reg) { return reg->syntax; } -extern const char* -onig_version(void) +extern int +onig_number_of_captures(regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(regex_t* reg) { -#define MSTR(a) # a +#ifdef USE_CAPTURE_HISTORY + int i, n; - return (MSTR(ONIGURUMA_VERSION_MAJOR) "." - MSTR(ONIGURUMA_VERSION_MINOR) "." - MSTR(ONIGURUMA_VERSION_TEENY)); + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif } + +extern void +onig_copy_encoding(OnigEncoding to, OnigEncoding from) +{ + *to = *from; +} + |
