summaryrefslogtreecommitdiff
path: root/ext/mbstring/oniguruma/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/oniguruma/regexec.c')
-rw-r--r--ext/mbstring/oniguruma/regexec.c1252
1 files changed, 748 insertions, 504 deletions
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c
index 2ded602e15..2c082de423 100644
--- a/ext/mbstring/oniguruma/regexec.c
+++ b/ext/mbstring/oniguruma/regexec.c
@@ -1,53 +1,152 @@
/**********************************************************************
-
regexec.c - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regint.h"
+#ifdef USE_CAPTURE_HISTORY
+static void history_tree_free(OnigCaptureTreeNode* node);
+
static void
-region_list_clear(OnigRegion** list)
+history_tree_clear(OnigCaptureTreeNode* node)
{
int i;
- if (IS_NOT_NULL(list)) {
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (IS_NOT_NULL(list[i])) {
- xfree(list[i]);
- list[i] = (OnigRegion* )0;
+ if (IS_NOT_NULL(node)) {
+ for (i = 0; i < node->num_childs; i++) {
+ if (IS_NOT_NULL(node->childs[i])) {
+ history_tree_free(node->childs[i]);
}
}
+ for (i = 0; i < node->allocated; i++) {
+ node->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ node->num_childs = 0;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+ node->group = -1;
}
}
static void
-region_list_free(OnigRegion* r)
+history_tree_free(OnigCaptureTreeNode* node)
+{
+ history_tree_clear(node);
+ xfree(node);
+}
+
+static void
+history_root_free(OnigRegion* r)
{
- if (IS_NOT_NULL(r->list)) {
- region_list_clear(r->list);
- xfree(r->list);
- r->list = (OnigRegion** )0;
+ if (IS_NOT_NULL(r->history_root)) {
+ history_tree_free(r->history_root);
+ r->history_root = (OnigCaptureTreeNode* )0;
}
}
-static OnigRegion**
-region_list_new()
+static OnigCaptureTreeNode*
+history_node_new()
{
- int i;
- OnigRegion** list;
+ OnigCaptureTreeNode* node;
+
+ node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
+ CHECK_NULL_RETURN(node);
+ node->childs = (OnigCaptureTreeNode** )0;
+ node->allocated = 0;
+ node->num_childs = 0;
+ node->group = -1;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+
+ return node;
+}
- list = (OnigRegion** )xmalloc(sizeof(OnigRegion*)
- * (ONIG_MAX_CAPTURE_HISTORY_GROUP + 1));
- CHECK_NULL_RETURN(list);
- for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- list[i] = (OnigRegion* )0;
+static int
+history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
+{
+#define HISTORY_TREE_INIT_ALLOC_SIZE 8
+
+ if (parent->num_childs >= parent->allocated) {
+ int n, i;
+
+ if (IS_NULL(parent->childs)) {
+ n = HISTORY_TREE_INIT_ALLOC_SIZE;
+ parent->childs =
+ (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
+ }
+ else {
+ n = parent->allocated * 2;
+ parent->childs =
+ (OnigCaptureTreeNode** )xrealloc(parent->childs,
+ sizeof(OnigCaptureTreeNode*) * n);
+ }
+ CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
+ for (i = parent->allocated; i < n; i++) {
+ parent->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ parent->allocated = n;
+ }
+
+ parent->childs[parent->num_childs] = child;
+ parent->num_childs++;
+ return 0;
+}
+
+static OnigCaptureTreeNode*
+history_tree_clone(OnigCaptureTreeNode* node)
+{
+ int i;
+ OnigCaptureTreeNode *clone, *child;
+
+ clone = history_node_new();
+ CHECK_NULL_RETURN(clone);
+
+ clone->beg = node->beg;
+ clone->end = node->end;
+ for (i = 0; i < node->num_childs; i++) {
+ child = history_tree_clone(node->childs[i]);
+ if (IS_NULL(child)) {
+ history_tree_free(clone);
+ return (OnigCaptureTreeNode* )0;
+ }
+ history_tree_add_child(clone, child);
}
- return list;
+ return clone;
}
+extern OnigCaptureTreeNode*
+onig_get_capture_tree(OnigRegion* region)
+{
+ return region->history_root;
+}
+#endif /* USE_CAPTURE_HISTORY */
+
extern void
onig_region_clear(OnigRegion* region)
{
@@ -56,14 +155,14 @@ onig_region_clear(OnigRegion* region)
for (i = 0; i < region->num_regs; i++) {
region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
}
- region_list_clear(region->list);
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(region);
+#endif
}
extern int
onig_region_resize(OnigRegion* region, int n)
{
- int i;
-
region->num_regs = n;
if (n < ONIG_NREGION)
@@ -88,92 +187,43 @@ onig_region_resize(OnigRegion* region, int n)
region->allocated = n;
}
- for (i = 0; i < region->num_regs; i++) {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
-
- if (IS_NOT_NULL(region->list))
- region_list_clear(region->list);
-
return 0;
}
-static int
-region_ensure_size(OnigRegion* region, int n)
+extern int
+onig_region_resize_clear(OnigRegion* region, int n)
{
- int i, new_size;
-
- if (region->allocated >= n)
- return 0;
-
- new_size = region->allocated;
- if (new_size == 0)
- new_size = ONIG_NREGION;
- while (new_size < n)
- new_size *= 2;
-
- if (region->allocated == 0) {
- region->beg = (int* )xmalloc(new_size * sizeof(int));
- region->end = (int* )xmalloc(new_size * sizeof(int));
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = new_size;
- }
- else if (region->allocated < new_size) {
- region->beg = (int* )xrealloc(region->beg, new_size * sizeof(int));
- region->end = (int* )xrealloc(region->end, new_size * sizeof(int));
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = new_size;
- }
-
- for (i = region->num_regs; i < n; i++) {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
+ int r;
+
+ r = onig_region_resize(region, n);
+ if (r != 0) return r;
+ onig_region_clear(region);
return 0;
}
-
-static int
-region_list_add_entry(OnigRegion* region, int group, int start, int end)
+
+extern int
+onig_region_set(OnigRegion* region, int at, int beg, int end)
{
- int r, pos;
- OnigRegion** list;
-
- if (group > ONIG_MAX_CAPTURE_HISTORY_GROUP)
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ if (at < 0) return ONIGERR_INVALID_ARGUMENT;
- if (IS_NULL(region->list)) {
- region->list = region_list_new();
- CHECK_NULL_RETURN_VAL(region->list, ONIGERR_MEMORY);
+ if (at >= region->allocated) {
+ int r = onig_region_resize(region, at + 1);
+ if (r < 0) return r;
}
-
- list = region->list;
- if (IS_NULL(list[group])) {
- list[group] = onig_region_new();
- CHECK_NULL_RETURN_VAL(list[group], ONIGERR_MEMORY);
- }
-
- r = region_ensure_size(list[group], list[group]->num_regs + 1);
- if (r != 0) return r;
-
- pos = list[group]->num_regs;
- list[group]->beg[pos] = start;
- list[group]->end[pos] = end;
- list[group]->num_regs++;
-
+
+ region->beg[at] = beg;
+ region->end[at] = end;
return 0;
}
-static void
+extern void
onig_region_init(OnigRegion* region)
{
- region->num_regs = 0;
- region->allocated = 0;
- region->beg = (int* )0;
- region->end = (int* )0;
- region->list = (OnigRegion** )0;
+ region->num_regs = 0;
+ region->allocated = 0;
+ region->beg = (int* )0;
+ region->end = (int* )0;
+ region->history_root = (OnigCaptureTreeNode* )0;
}
extern OnigRegion*
@@ -195,7 +245,9 @@ onig_region_free(OnigRegion* r, int free_self)
if (r->end) xfree(r->end);
r->allocated = 0;
}
- region_list_free(r);
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(r);
+#endif
if (free_self) xfree(r);
}
}
@@ -227,34 +279,19 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
}
to->num_regs = from->num_regs;
- if (IS_NOT_NULL(from->list)) {
- if (IS_NULL(to->list)) {
- to->list = region_list_new();
- }
-
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (IS_NOT_NULL(from->list[i])) {
- if (IS_NULL(to->list[i]))
- to->list[i] = onig_region_new();
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(to);
- onig_region_copy(to->list[i], from->list[i]);
- }
- else {
- if (IS_NOT_NULL(to->list[i])) {
- xfree(to->list[i]);
- to->list[i] = (OnigRegion* )0;
- }
- }
- }
+ if (IS_NOT_NULL(from->history_root)) {
+ to->history_root = history_tree_clone(from->history_root);
}
- else
- region_list_free(to);
+#endif
}
/** stack **/
#define INVALID_STACK_INDEX -1
-typedef int StackIndex;
+typedef long StackIndex;
typedef struct _StackType {
unsigned int type;
@@ -324,7 +361,7 @@ typedef struct {
int stack_n;
OnigOptionType options;
OnigRegion* region;
- UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
} MatchArg;
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
@@ -362,11 +399,26 @@ typedef struct {
};\
} while(0)
+static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+
+extern unsigned int
+onig_get_match_stack_limit_size(void)
+{
+ return MatchStackLimitSize;
+}
+
+extern int
+onig_set_match_stack_limit_size(unsigned int size)
+{
+ MatchStackLimitSize = size;
+ return 0;
+}
+
static int
stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
{
- int n;
+ unsigned int n;
StackType *x, *stk_base, *stk_end, *stk;
stk_base = *arg_stk_base;
@@ -385,7 +437,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}
else {
n *= 2;
- if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
+ if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ else
+ n = MatchStackLimitSize;
+ }
x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
@@ -831,24 +888,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}\
} while(0)
-#define STRING_CMP_IC(s1,ps2,len) do {\
- if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
+ if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
goto fail; \
} while(0)
-static int string_cmp_ic(OnigEncoding enc,
+static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
UChar* s1, UChar** ps2, int mblen)
{
- UChar buf1[ONIGENC_MBC_TO_LOWER_MAXLEN];
- UChar buf2[ONIGENC_MBC_TO_LOWER_MAXLEN];
- UChar *p1, *p2, *end, *s2;
+ UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *p1, *p2, *end, *s2, *end2;
int len1, len2;
- s2 = *ps2;
- end = s1 + mblen;
+ s2 = *ps2;
+ end = s1 + mblen;
+ end2 = s2 + mblen;
while (s1 < end) {
- len1 = ONIGENC_MBC_TO_LOWER(enc, s1, buf1);
- len2 = ONIGENC_MBC_TO_LOWER(enc, s2, buf2);
+ len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
+ len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
if (len1 != len2) return 0;
p1 = buf1;
p2 = buf2;
@@ -857,9 +915,6 @@ static int string_cmp_ic(OnigEncoding enc,
p1++;
p2++;
}
-
- s1 += enc_len(enc, *s1);
- s2 += enc_len(enc, *s2);
}
*ps2 = s2;
@@ -875,8 +930,8 @@ static int string_cmp_ic(OnigEncoding enc,
}\
} while(0)
-#define STRING_CMP_VALUE_IC(s1,ps2,len,is_fail) do {\
- if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
is_fail = 1; \
else \
is_fail = 0; \
@@ -891,6 +946,110 @@ static int string_cmp_ic(OnigEncoding enc,
#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+#ifdef USE_CAPTURE_HISTORY
+static int
+make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
+ StackType* stk_top, UChar* str, regex_t* reg)
+{
+ int n, r;
+ OnigCaptureTreeNode* child;
+ StackType* k = *kp;
+
+ while (k < stk_top) {
+ if (k->type == STK_MEM_START) {
+ n = k->u.mem.num;
+ if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
+ BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ child = history_node_new();
+ CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
+ child->group = n;
+ child->beg = (int )(k->u.mem.pstr - str);
+ r = history_tree_add_child(node, child);
+ if (r != 0) return r;
+ *kp = (k + 1);
+ r = make_capture_history_tree(child, kp, stk_top, str, reg);
+ if (r != 0) return r;
+
+ k = *kp;
+ child->end = (int )(k->u.mem.pstr - str);
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (k->u.mem.num == node->group) {
+ node->end = (int )(k->u.mem.pstr - str);
+ *kp = k;
+ return 0;
+ }
+ }
+ k++;
+ }
+
+ return 1; /* 1: root node ending. */
+}
+#endif
+
+#ifdef RUBY_PLATFORM
+
+typedef struct {
+ int state;
+ regex_t* reg;
+ MatchArg* msa;
+ StackType* stk_base;
+} TrapEnsureArg;
+
+static VALUE
+trap_ensure(VALUE arg)
+{
+ TrapEnsureArg* ta = (TrapEnsureArg* )arg;
+
+ if (ta->state == 0) { /* trap_exec() is not normal return */
+ ONIG_STATE_DEC(ta->reg);
+ if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
+ xfree(ta->stk_base);
+
+ MATCH_ARG_FREE(*(ta->msa));
+ }
+
+ return Qnil;
+}
+
+static VALUE
+trap_exec(VALUE arg)
+{
+ TrapEnsureArg* ta;
+
+ rb_trap_exec();
+
+ ta = (TrapEnsureArg* )arg;
+ ta->state = 1; /* normal return */
+ return Qnil;
+}
+
+extern void
+onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
+{
+ VALUE arg;
+ TrapEnsureArg ta;
+
+ ta.state = 0;
+ ta.reg = reg;
+ ta.msa = msa;
+ ta.stk_base = stk_base;
+ arg = (VALUE )(&ta);
+ rb_ensure(trap_exec, arg, trap_ensure, arg);
+}
+
+#define CHECK_INTERRUPT_IN_MATCH_AT do {\
+ if (rb_trap_pending) {\
+ if (! rb_prohibit_interrupt) {\
+ onig_exec_trap(reg, msa, stk_base);\
+ }\
+ }\
+} while (0)
+#else
+#define CHECK_INTERRUPT_IN_MATCH_AT
+#endif /* RUBY_PLATFORM */
+
#ifdef ONIG_DEBUG_STATISTICS
#define USE_TIMEOFDAY
@@ -935,6 +1094,7 @@ static int MaxStackDepth = 0;
} while (0)
#ifdef RUBY_PLATFORM
+
/*
* :nodoc:
*/
@@ -984,7 +1144,7 @@ onig_print_statistics(FILE* f)
#endif
extern int
-onig_is_in_code_range(UChar* p, OnigCodePoint code)
+onig_is_in_code_range(const UChar* p, OnigCodePoint code)
{
OnigCodePoint n, *data;
OnigCodePoint low, high, x;
@@ -1004,6 +1164,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code)
return ((low < n && code >= data[low * 2]) ? 1 : 0);
}
+static int
+code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+{
+ unsigned int in_cc;
+ CClassNode* cc = (CClassNode* )node;
+
+ if (enclen == 1) {
+ in_cc = BITSET_AT(cc->bs, code);
+ }
+ else {
+ UChar* p = ((BBuf* )(cc->mbuf))->p;
+ in_cc = onig_is_in_code_range(p, code);
+ }
+
+ if (IS_CCLASS_NOT(cc)) {
+ return (in_cc ? 0 : 1);
+ }
+ else {
+ return (in_cc ? 1 : 0);
+ }
+}
/* matching region of POSIX API */
typedef int regoff_t;
@@ -1016,7 +1197,7 @@ typedef struct {
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static int
-match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
+match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
UChar* sprev, MatchArg* msa)
{
static UChar FinishCode[] = { OP_FINISH };
@@ -1027,18 +1208,18 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
RelAddrType addr;
OnigOptionType option = reg->options;
OnigEncoding encode = reg->enc;
- int ignore_case;
+ OnigAmbigType ambig_flag = reg->ambig_flag;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
char *alloca_base;
StackType *stk_alloc, *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
+ StackIndex si;
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
n = reg->num_repeat + reg->num_mem * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
- ignore_case = IS_IGNORECASE(option);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
repeat_stk = (StackIndex* )alloca_base;
@@ -1062,7 +1243,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
best_len = ONIG_MISMATCH;
- s = sstart;
+ s = (UChar* )sstart;
while (1) {
#ifdef ONIG_DEBUG_MATCH
{
@@ -1071,7 +1252,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
fprintf(stderr, "%4d> \"", (int )(s - str));
bp = buf;
for (i = 0, q = s; i < 7 && q < end; i++) {
- len = enc_len(encode, *q);
+ len = enc_len(encode, q);
while (len-- > 0) *bp++ = *q++;
}
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
@@ -1079,7 +1260,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
*bp = 0;
fputs(buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- onig_print_compiled_byte_code(stderr, p, NULL);
+ onig_print_compiled_byte_code(stderr, p, NULL, encode);
fprintf(stderr, "\n");
}
#endif
@@ -1134,27 +1315,33 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
}
+#ifdef USE_CAPTURE_HISTORY
if (reg->capture_history != 0) {
- UChar *pstart, *pend;
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(reg->capture_history, i) != 0) {
- stkp = stk_base;
- do {
- STACK_GET_MEM_RANGE(stkp, i, pstart, pend);
- if (stkp < stk) {
- int r;
- r = region_list_add_entry(region, i,
- pstart - str, pend - str);
- if (r) {
- STACK_SAVE;
- return r;
- }
- }
- stkp++;
- } while (stkp < stk);
- }
- }
- } /* list of captures */
+ int r;
+ OnigCaptureTreeNode* node;
+
+ if (IS_NULL(region->history_root)) {
+ region->history_root = node = history_node_new();
+ CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
+ }
+ else {
+ node = region->history_root;
+ history_tree_clear(node);
+ }
+
+ node->group = 0;
+ node->beg = sstart - str;
+ node->end = s - str;
+
+ stkp = stk_base;
+ r = make_capture_history_tree(region->history_root, &stkp,
+ stk, (UChar* )str, reg);
+ if (r < 0) {
+ best_len = r; /* error code */
+ goto finish;
+ }
+ }
+#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_REGION_OPTION
} /* else IS_POSIX_REGION() */
#endif
@@ -1171,10 +1358,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
goto fail; /* for retry */
}
}
- else {
- /* default behavior: return first-matching result. */
- goto finish;
- }
+
+ /* default behavior: return first-matching result. */
+ goto finish;
break;
case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
@@ -1192,14 +1378,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
{
int len;
- UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
- len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
- DATA_ENSURE(len);
+ DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ exact1_ic_retry:
+ len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ DATA_ENSURE(0);
q = lowbuf;
- s += enc_len(encode, *s);
while (len-- > 0) {
- if (*p != *q) goto fail;
+ if (*p != *q) {
+#if 1
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ s = ss;
+ p = sp;
+ goto exact1_ic_retry;
+ }
+ else
+ goto fail;
+#else
+ goto fail;
+#endif
+ }
p++; q++;
}
}
@@ -1276,19 +1479,36 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
{
int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
GET_LENGTH_INC(tlen, p);
endp = p + tlen;
while (p < endp) {
- len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
- DATA_ENSURE(len);
sprev = s;
- s += enc_len(encode, *s);
+ DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ exactn_ic_retry:
+ len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
- if (*p != *q) goto fail;
+ if (*p != *q) {
+#if 1
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ s = ss;
+ p = sp;
+ goto exactn_ic_retry;
+ }
+ else
+ goto fail;
+#else
+ goto fail;
+#endif
+ }
p++; q++;
}
}
@@ -1389,20 +1609,22 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
- s += enc_len(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */
+ s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
STAT_OP_OUT;
break;
case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
- if (! ONIGENC_IS_MBC_HEAD(encode, *s)) goto fail;
+ if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
GET_LENGTH_INC(tlen, p);
{
OnigCodePoint code;
UChar *ss;
- int mb_len = enc_len(encode, *s);
+ int mb_len;
+ DATA_ENSURE(1);
+ mb_len = enc_len(encode, s);
DATA_ENSURE(mb_len);
ss = s;
s += mb_len;
@@ -1422,7 +1644,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
goto cclass_mb;
}
@@ -1442,13 +1664,13 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
- s += enc_len(encode, *s);
+ s += enc_len(encode, s);
STAT_OP_OUT;
break;
case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
- if (! ONIGENC_IS_MBC_HEAD(encode, *s)) {
- DATA_ENSURE(1);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
s++;
GET_LENGTH_INC(tlen, p);
p += tlen;
@@ -1460,10 +1682,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
{
OnigCodePoint code;
UChar *ss;
- int mb_len = enc_len(encode, *s);
+ int mb_len = enc_len(encode, s);
if (s + mb_len > end) {
- s = end;
+ DATA_ENSURE(1);
+ s = (UChar* )end;
p += tlen;
goto cc_mb_not_success;
}
@@ -1488,7 +1711,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
goto cclass_mb_not;
}
@@ -1504,22 +1727,36 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
break;
- case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
- n = enc_len(encode, *s);
- if (n > 1) {
- DATA_ENSURE(n);
- s += n;
- }
- else {
- DATA_ENSURE(1);
- if (ONIG_IS_NEWLINE(*s)) goto fail;
- s++;
+ case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
+ {
+ OnigCodePoint code;
+ void *node;
+ int mb_len;
+ UChar *ss;
+
+ DATA_ENSURE(1);
+ GET_POINTER_INC(node, p);
+ mb_len = enc_len(encode, s);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
}
STAT_OP_OUT;
break;
+ case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
+ DATA_ENSURE(1);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ s += n;
+ STAT_OP_OUT;
+ break;
+
case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
- n = enc_len(encode, *s);
+ DATA_ENSURE(1);
+ n = enc_len(encode, s);
DATA_ENSURE(n);
s += n;
STAT_OP_OUT;
@@ -1528,17 +1765,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
while (s < end) {
STACK_PUSH_ALT(p, s, sprev);
- n = enc_len(encode, *s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- if (ONIG_IS_NEWLINE(*s)) goto fail;
- sprev = s;
- s++;
- }
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
}
STAT_OP_OUT;
break;
@@ -1546,7 +1777,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
while (s < end) {
STACK_PUSH_ALT(p, s, sprev);
- n = enc_len(encode, *s);
+ n = enc_len(encode, s);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@@ -1565,17 +1796,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- n = enc_len(encode, *s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- if (ONIG_IS_NEWLINE(*s)) goto fail;
- sprev = s;
- s++;
- }
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
}
p++;
STAT_OP_OUT;
@@ -1586,7 +1811,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- n = enc_len(encode, *s);
+ n = enc_len(encode, s);
if (n >1) {
DATA_ENSURE(n);
sprev = s;
@@ -1606,7 +1831,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- s += enc_len(encode, *s);
+ s += enc_len(encode, s);
STAT_OP_OUT;
break;
@@ -1615,7 +1840,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- s += enc_len(encode, *s);
+ s += enc_len(encode, s);
STAT_OP_OUT;
break;
@@ -1698,7 +1923,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
continue;
}
- else if (ONIG_IS_NEWLINE(*sprev) && !ON_STR_END(s)) {
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
STAT_OP_OUT;
continue;
}
@@ -1708,7 +1933,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
STAT_OP_OUT;
@@ -1717,7 +1942,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
#endif
}
- else if (ONIG_IS_NEWLINE(*s)) {
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
STAT_OP_OUT;
continue;
}
@@ -1727,7 +1952,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
STAT_OP_OUT;
@@ -1736,7 +1961,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
#endif
}
- if (ONIG_IS_NEWLINE(*s) && ON_STR_END(s+1)) {
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
+ ON_STR_END(s + enc_len(encode, s))) {
STAT_OP_OUT;
continue;
}
@@ -1845,7 +2071,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(n);
sprev = s;
STRING_CMP(pstart, s, n);
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
STAT_OP_OUT;
@@ -1876,8 +2102,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
n = pend - pstart;
DATA_ENSURE(n);
sprev = s;
- STRING_CMP_IC(pstart, &s, n);
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ STRING_CMP_IC(ambig_flag, pstart, &s, n);
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
STAT_OP_OUT;
@@ -1912,7 +2138,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -1948,10 +2174,10 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(n);
sprev = s;
swork = s;
- STRING_CMP_VALUE_IC(pstart, &swork, n, is_fail);
+ STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -1965,7 +2191,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
- ignore_case = IS_IGNORECASE(option);
STACK_PUSH_ALT(p, s, sprev);
p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
STAT_OP_OUT;
@@ -1974,7 +2199,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
GET_OPTION_INC(option, p);
- ignore_case = IS_IGNORECASE(option);
STAT_OP_OUT;
continue;
break;
@@ -2006,6 +2230,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
p += SIZE_MEMNUM;
break;
default:
@@ -2072,6 +2298,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
GET_RELADDR_INC(addr, p);
p += addr;
STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
continue;
break;
@@ -2150,79 +2377,70 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
- {
- StackIndex si;
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
-#ifdef USE_SUBEXP_CALL
- if (reg->num_call > 0) {
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- }
- else {
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
- }
-#else
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-#endif
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
- }
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(p, s, sprev);
- p = stkp->u.repeat.pcode;
- }
- else {
- p = stkp->u.repeat.pcode;
- }
- STACK_PUSH_REPEAT_INC(si);
+ repeat_inc:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ /* end of repeat. Nothing to do. */
}
+ else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ }
+ STACK_PUSH_REPEAT_INC(si);
STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
continue;
break;
- case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
- {
- StackIndex si;
-
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
-#ifdef USE_SUBEXP_CALL
- if (reg->num_call > 0) {
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- }
- else {
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
- }
-#else
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-#endif
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- UChar* pcode = stkp->u.repeat.pcode;
+ case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc;
+ break;
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev);
- }
- else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- }
- }
- else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
- }
- }
- STAT_OP_OUT;
+ case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+
+ repeat_inc_ng:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper ||
+ IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ }
+ else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
continue;
break;
+ case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc_ng;
+ break;
+
case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
STACK_PUSH_POS(s, sprev);
STAT_OP_OUT;
@@ -2265,9 +2483,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
GET_LENGTH_INC(tlen, p);
- s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
- sprev = onigenc_get_prev_char_head(encode, str, s);
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
STAT_OP_OUT;
continue;
break;
@@ -2275,7 +2493,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(tlen, p);
- q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(q)) {
/* too short case -> success. ex. /(?<!XXX)a/.match("a")
If you want to change to fail, replace following line. */
@@ -2285,7 +2503,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
else {
STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
s = q;
- sprev = onigenc_get_prev_char_head(encode, str, s);
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
STAT_OP_OUT;
continue;
@@ -2358,15 +2576,16 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
static UChar*
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+ const UChar* text, const UChar* text_end, UChar* text_range)
{
UChar *t, *p, *s, *end;
- end = text_end - (target_end - target) + 1;
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
if (end > text_range)
end = text_range;
- s = text;
+ s = (UChar* )text;
while (s < end) {
if (*s == *target) {
@@ -2380,97 +2599,66 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s += enc_len(enc, *s);
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
}
-#if 0
static int
-str_trans_match_after_head_byte(OnigEncoding enc,
- int len, UChar* t, UChar* tend, UChar* p)
+str_lower_case_match(OnigEncoding enc, int ambig_flag,
+ const UChar* t, const UChar* tend,
+ const UChar* p, const UChar* end)
{
- while (--len > 0) {
- if (*t != *p) break;
- t++; p++;
- }
-
- if (len == 0) {
- int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
-
- while (t < tend) {
- len = enc_len(enc, *p);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
- q = lowbuf;
- while (lowlen > 0) {
- if (*t++ != *q++) break;
- lowlen--;
- }
- if (lowlen > 0) break;
- p += len;
- }
- if (t == tend)
- return 1;
- }
-
- return 0;
-}
-#endif
+ int lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ const UChar* tsave;
+ const UChar* psave;
-static int
-str_lower_case_match(OnigEncoding enc, UChar* t, UChar* tend, UChar* p)
-{
- int len, lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ tsave = t;
+ psave = p;
+ retry:
while (t < tend) {
- len = enc_len(enc, *p);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
+ lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
- if (*t++ != *q++) return 0;
+ if (*t++ != *q++) {
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ t = tsave;
+ p = psave;
+ goto retry;
+ }
+ else
+ return 0;
+ }
lowlen--;
}
- p += len;
}
return 1;
}
static UChar*
-slow_search_ic(OnigEncoding enc,
+slow_search_ic(OnigEncoding enc, int ambig_flag,
UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+ const UChar* text, const UChar* text_end, UChar* text_range)
{
- int len, lowlen;
- UChar *t, *p, *s, *end;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *s, *end;
- end = text_end - (target_end - target) + 1;
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
if (end > text_range)
end = text_range;
- s = text;
+ s = (UChar* )text;
while (s < end) {
- len = enc_len(enc, *s);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
- if (*target == *lowbuf) {
- p = lowbuf + 1;
- t = target + 1;
- while (--lowlen > 0) {
- if (*p != *t) break;
- p++; *t++;
- }
- if (lowlen == 0) {
- if (str_lower_case_match(enc, t, target_end, s + len))
- return s;
- }
- }
+ if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+ return s;
- s += len;
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
@@ -2478,13 +2666,15 @@ slow_search_ic(OnigEncoding enc,
static UChar*
slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
- UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start)
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
UChar *t, *p, *s;
- s = text_end - (target_end - target);
+ s = (UChar* )text_end;
+ s -= (target_end - target);
if (s > text_start)
- s = text_start;
+ s = (UChar* )text_start;
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
@@ -2500,58 +2690,52 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
-slow_search_backward_ic(OnigEncoding enc,
- UChar* target,UChar* target_end,
- UChar* text, UChar* adjust_text,
- UChar* text_end, UChar* text_start)
+slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
- int len, lowlen;
- UChar *t, *p, *s;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *s;
- s = text_end - (target_end - target);
+ s = (UChar* )text_end;
+ s -= (target_end - target);
if (s > text_start)
- s = text_start;
+ s = (UChar* )text_start;
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
- len = enc_len(enc, *s);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
- if (*target == *lowbuf) {
- p = lowbuf + 1;
- t = target + 1;
- while (--lowlen > 0) {
- if (*p != *t) break;
- p++; *t++;
- }
- if (lowlen == 0) {
- if (str_lower_case_match(enc, t, target_end, s + len))
- return s;
- }
- }
+ if (str_lower_case_match(enc, ambig_flag,
+ target, target_end, s, text_end))
+ return s;
- s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
-bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
- UChar *s, *t, *p, *end;
- UChar *tail;
+ const UChar *s, *t, *p, *end;
+ const UChar *tail;
int skip;
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
+ (int )text, (int )text_end, (int )text_range);
+#endif
+
end = text_range + (target_end - target) - 1;
if (end > text_end)
end = text_end;
@@ -2559,7 +2743,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
tail = target_end - 1;
s = text;
while ((s - text) < target_end - target) {
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
}
s--; /* set to text check tail position. */
@@ -2570,14 +2754,16 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
skip = reg->map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
- p += enc_len(reg->enc, *p);
- }
+ do {
+ p += enc_len(reg->enc, p);
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -2588,14 +2774,16 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
skip = reg->int_map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
- p += enc_len(reg->enc, *p);
- }
+ do {
+ p += enc_len(reg->enc, p);
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -2603,11 +2791,11 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
}
static UChar*
-bm_search(regex_t* reg, UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end, const UChar* text_range)
{
- UChar *s, *t, *p, *end;
- UChar *tail;
+ const UChar *s, *t, *p, *end;
+ const UChar *tail;
end = text_range + (target_end - target) - 1;
if (end > text_end)
@@ -2622,7 +2810,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
s += reg->map[*s];
}
}
@@ -2633,7 +2821,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
s += reg->int_map[*s];
}
}
@@ -2641,11 +2829,10 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
}
static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc,
- int ignore_case, int** skip)
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
+
{
int i, len;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
if (IS_NULL(*skip)) {
*skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
@@ -2656,24 +2843,18 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc,
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
(*skip)[i] = len;
- if (ignore_case) {
- for (i = len - 1; i > 0; i--) {
- ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
- (*skip)[*lowbuf] = i;
- }
- }
- else {
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
- }
+ for (i = len - 1; i > 0; i--)
+ (*skip)[s[i]] = i;
+
return 0;
}
static UChar*
-bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
- UChar* adjust_text, UChar* text_end, UChar* text_start)
+bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
- UChar *s, *t, *p;
+ const UChar *s, *t, *p;
s = text_end - (target_end - target);
if (text_start < s)
@@ -2688,7 +2869,7 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
p++; t++;
}
if (t == target_end)
- return s;
+ return (UChar* )s;
s -= reg->int_map_backward[*s];
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
@@ -2698,26 +2879,28 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
}
static UChar*
-map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range)
+map_search(OnigEncoding enc, UChar map[],
+ const UChar* text, const UChar* text_range)
{
- UChar *s = text;
+ const UChar *s = text;
while (s < text_range) {
- if (map[*s]) return s;
+ if (map[*s]) return (UChar* )s;
- s += enc_len(enc, *s);
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
}
static UChar*
map_search_backward(OnigEncoding enc, UChar map[],
- UChar* text, UChar* adjust_text, UChar* text_start)
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_start)
{
- UChar *s = text_start;
+ const UChar *s = text_start;
while (s >= text) {
- if (map[*s]) return s;
+ if (map[*s]) return (UChar* )s;
s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
@@ -2725,13 +2908,32 @@ map_search_backward(OnigEncoding enc, UChar map[],
}
extern int
-onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
+onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
OnigOptionType option)
{
int r;
UChar *prev;
MatchArg msa;
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(reg);
+ ONIG_STATE_INC(reg);
+ }
+ }
+ else {
+ int n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ ONIG_STATE_INC(reg);
+ }
+#endif /* USE_MULTI_THREAD_SYSTEM */
+
MATCH_ARG_INIT(msa, option, region, at);
if (region
@@ -2739,21 +2941,23 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
&& !IS_POSIX_REGION(option)
#endif
) {
- r = onig_region_resize(region, reg->num_mem + 1);
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
}
else
r = 0;
if (r == 0) {
- prev = onigenc_get_prev_char_head(reg->enc, str, at);
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end, at, prev, &msa);
}
+
MATCH_ARG_FREE(msa);
+ ONIG_STATE_DEC(reg);
return r;
}
static int
-forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
+forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
UChar* range, UChar** low, UChar** high, UChar** low_prev)
{
UChar *p, *pprev = (UChar* )NULL;
@@ -2770,7 +2974,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
}
else {
UChar *q = p + reg->dmin;
- while (p < q) p += enc_len(reg->enc, *p);
+ while (p < q) p += enc_len(reg->enc, p);
}
}
@@ -2780,7 +2984,8 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range);
+ p = slow_search_ic(reg->enc, reg->ambig_flag,
+ reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_BM:
@@ -2800,7 +3005,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (p - reg->dmin < s) {
retry_gate:
pprev = p;
- p += enc_len(reg->enc, *p);
+ p += enc_len(reg->enc, p);
goto retry;
}
@@ -2812,19 +3017,19 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
- if (!ONIG_IS_NEWLINE(*prev))
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
break;
case ANCHOR_END_LINE:
if (ON_STR_END(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
- if (prev && ONIG_IS_NEWLINE(*prev))
+ if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
- else if (!ONIG_IS_NEWLINE(*p))
+ else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end))
goto retry_gate;
break;
}
@@ -2845,7 +3050,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
*low = p - reg->dmax;
if (*low > s) {
*low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, low_prev);
+ *low, (const UChar** )low_prev);
if (low_prev && IS_NULL(*low_prev))
*low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : s), *low);
@@ -2872,13 +3077,14 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
}
static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
- int ignore_case, int** skip));
+ int** skip));
#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
static int
-backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
- UChar* range, UChar* adjrange, UChar** low, UChar** high)
+backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
+ UChar* s, const UChar* range, UChar* adjrange,
+ UChar** low, UChar** high)
{
int r;
UChar *p;
@@ -2895,8 +3101,9 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_backward_ic(reg->enc, reg->exact,
- reg->exact_end, range, adjrange, end, p);
+ p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
+ reg->exact, reg->exact_end,
+ range, adjrange, end, p);
break;
case ONIG_OPTIMIZE_EXACT_BM:
@@ -2905,7 +3112,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
goto exact_method;
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 0,
+ r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
&(reg->int_map_backward));
if (r) return r;
}
@@ -2926,7 +3133,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (!ONIG_IS_NEWLINE(*prev)) {
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
}
@@ -2937,12 +3144,12 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (ON_STR_END(p)) {
prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(prev)) goto fail;
- if (ONIG_IS_NEWLINE(*prev)) {
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
}
}
- else if (!ONIG_IS_NEWLINE(*p)) {
+ else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) {
p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(p)) goto fail;
goto retry;
@@ -2974,18 +3181,19 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
extern int
-onig_search(regex_t* reg, UChar* str, UChar* end,
- UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)
+onig_search(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
{
int r;
UChar *s, *prev;
MatchArg msa;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- reg->state++; /* increment as search counter */
- if (IS_NOT_NULL(reg->chain)) {
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(reg);
- reg->state++;
+ ONIG_STATE_INC(reg);
}
}
else {
@@ -2995,12 +3203,14 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- reg->state++; /* increment as search counter */
+ ONIG_STATE_INC(reg);
}
+#endif /* USE_MULTI_THREAD_SYSTEM */
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
- (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
+ fprintf(stderr,
+ "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
+ (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
if (region
@@ -3008,7 +3218,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
&& !IS_POSIX_REGION(option)
#endif
) {
- r = onig_region_resize(region, reg->num_mem + 1);
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
if (r) goto finish_no_msa;
}
@@ -3049,7 +3259,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
}
}
else if (reg->anchor & ANCHOR_END_BUF) {
- semi_end = end;
+ semi_end = (UChar* )end;
end_buf:
if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
@@ -3082,14 +3292,16 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
- if (ONIG_IS_NEWLINE(end[-1])) {
- semi_end = end - 1;
+ UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
+
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
+ semi_end = pre_end;
if (semi_end > str && start <= semi_end) {
goto end_buf;
}
}
else {
- semi_end = end;
+ semi_end = (UChar* )end;
goto end_buf;
}
}
@@ -3098,14 +3310,15 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
}
}
else if (str == end) { /* empty string */
- static UChar* address_for_empty_string = "";
+ static const UChar* address_for_empty_string = "";
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search: empty string.\n");
#endif
if (reg->threshold_len == 0) {
- s = start = end = str = address_for_empty_string;
+ start = end = str = address_for_empty_string;
+ s = (UChar* )start;
prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, option, region, start);
@@ -3122,7 +3335,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
MATCH_ARG_INIT(msa, option, region, start);
- s = start;
+ s = (UChar* )start;
if (range > start) { /* forward search */
if (s > str)
prev = onigenc_get_prev_char_head(reg->enc, str, s);
@@ -3132,13 +3345,13 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
- sch_range = range;
+ sch_range = (UChar* )range;
if (reg->dmax != 0) {
if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_range = end;
+ sch_range = (UChar* )end;
else {
sch_range += reg->dmax;
- if (sch_range > end) sch_range = end;
+ if (sch_range > end) sch_range = (UChar* )end;
}
}
if (reg->dmax != ONIG_INFINITE_DISTANCE &&
@@ -3153,13 +3366,14 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK;
prev = s;
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
}
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
if (IS_NOT_NULL(prev)) {
- while (!ONIG_IS_NEWLINE(*prev) && s < range) {
+ while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
+ s < range) {
prev = s;
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
}
}
}
@@ -3176,19 +3390,23 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
do {
MATCH_AND_RETURN_CHECK;
prev = s;
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
} while (s <= range); /* exec s == range, because empty match with /$/. */
}
else { /* backward search */
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
- adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ if (range < end)
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ else
+ adjrange = (UChar* )end;
+
if (reg->dmax != ONIG_INFINITE_DISTANCE &&
(end - range) >= reg->threshold_len) {
do {
sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = end;
+ if (sch_start > end) sch_start = (UChar* )end;
if (backward_search_range(reg, str, end, sch_start, range, adjrange,
&low, &high) <= 0)
goto mismatch;
@@ -3210,10 +3428,10 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
sch_start = s;
if (reg->dmax != 0) {
if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_start = end;
+ sch_start = (UChar* )end;
else {
sch_start += reg->dmax;
- if (sch_start > end) sch_start = end;
+ if (sch_start > end) sch_start = (UChar* )end;
else
sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
start, sch_start);
@@ -3236,7 +3454,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
finish:
MATCH_ARG_FREE(msa);
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
@@ -3257,7 +3475,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
@@ -3265,7 +3483,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return r;
match:
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
MATCH_ARG_FREE(msa);
return s - str;
}
@@ -3282,18 +3500,44 @@ onig_get_options(regex_t* reg)
return reg->options;
}
+extern OnigAmbigType
+onig_get_ambig_flag(regex_t* reg)
+{
+ return reg->ambig_flag;
+}
+
extern OnigSyntaxType*
onig_get_syntax(regex_t* reg)
{
return reg->syntax;
}
-extern const char*
-onig_version(void)
+extern int
+onig_number_of_captures(regex_t* reg)
+{
+ return reg->num_mem;
+}
+
+extern int
+onig_number_of_capture_histories(regex_t* reg)
{
-#define MSTR(a) # a
+#ifdef USE_CAPTURE_HISTORY
+ int i, n;
- return (MSTR(ONIGURUMA_VERSION_MAJOR) "."
- MSTR(ONIGURUMA_VERSION_MINOR) "."
- MSTR(ONIGURUMA_VERSION_TEENY));
+ n = 0;
+ for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(reg->capture_history, i) != 0)
+ n++;
+ }
+ return n;
+#else
+ return 0;
+#endif
}
+
+extern void
+onig_copy_encoding(OnigEncoding to, OnigEncoding from)
+{
+ *to = *from;
+}
+