summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_exec.c
diff options
context:
space:
mode:
authorIlia Alshanetsky <iliaa@php.net>2010-02-03 12:59:00 +0000
committerIlia Alshanetsky <iliaa@php.net>2010-02-03 12:59:00 +0000
commit91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8 (patch)
tree1817d9652f110cf0cd4644ed8586ca56d0c05d9e /ext/pcre/pcrelib/pcre_exec.c
parent43d5429381237518ced74149f29a851c30307bea (diff)
downloadphp-git-91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8.tar.gz
Downgrade bundled PCRE to version 7.9 due to 8.0+ version use of C99
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c1323
1 files changed, 280 insertions, 1043 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 46f667e21b..073cf2410a 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2010 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -87,7 +87,7 @@ static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
/*************************************************
* Debugging function to print chars *
*************************************************/
@@ -139,7 +139,7 @@ match_ref(int offset, register USPTR eptr, int length, match_data *md,
{
USPTR p = md->start_subject + md->offset_vector[offset];
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
if (eptr >= md->end_subject)
printf("matching subject <null>");
else
@@ -252,7 +252,7 @@ actuall used in this definition. */
#ifndef NO_RECURSE
#define REGISTER register
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
{ \
printf("match() called in line %d\n", __LINE__); \
@@ -396,32 +396,10 @@ typedef struct heapframe {
/* This function is called recursively in many circumstances. Whenever it
returns a negative (error) response, the outer incarnation must also return the
-same response. */
+same response.
-/* These macros pack up tests that are used for partial matching, and which
-appears several times in the code. We set the "hit end" flag if the pointer is
-at the end of the subject and also past the start of the subject (i.e.
-something has been matched). For hard partial matching, we then return
-immediately. The second one is used when we already know we are past the end of
-the subject. */
-
-#define CHECK_PARTIAL()\
- if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
- {\
- md->hitend = TRUE;\
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
- }
-
-#define SCHECK_PARTIAL()\
- if (md->partial != 0 && eptr > mstart)\
- {\
- md->hitend = TRUE;\
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
- }
-
-
-/* Performance note: It might be tempting to extract commonly used fields from
-the md structure (e.g. utf8, end_subject) into individual variables to improve
+Performance note: It might be tempting to extract commonly used fields from the
+md structure (e.g. utf8, end_subject) into individual variables to improve
performance. Tests using gcc on a SPARC disproved this; in the first case, it
made performance worse.
@@ -620,7 +598,7 @@ TAIL_RECURSE:
/* OK, now we can get on with the real code of the function. Recursive calls
are specified by the macro RMATCH and RRETURN is used to return. When
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
-and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
+and a "return", respectively (possibly with some debugging if DEBUG is
defined). However, RMATCH isn't like a function call because it's quite a
complicated macro. It has to be used in one particular way. This shouldn't,
however, impact performance when true recursion is being used. */
@@ -662,6 +640,14 @@ for (;;)
minimize = possessive = FALSE;
op = *ecode;
+ /* For partial matching, remember if we ever hit the end of the subject after
+ matching at least one subject character. */
+
+ if (md->partial &&
+ eptr >= md->end_subject &&
+ eptr > mstart)
+ md->hitend = TRUE;
+
switch(op)
{
case OP_FAIL:
@@ -711,7 +697,7 @@ for (;;)
number = GET2(ecode, 1+LINK_SIZE);
offset = number << 1;
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
printf("start bracket %d\n", number);
printf("subject=");
pchars(eptr, 16, TRUE, md);
@@ -837,139 +823,18 @@ for (;;)
/* Now see what the actual condition is */
- if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
+ if (condcode == OP_RREF) /* Recursion test */
{
- if (md->recursive == NULL) /* Not recursing => FALSE */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- else
- {
- int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
- condition = (recno == RREF_ANY || recno == md->recursive->group_num);
-
- /* If the test is for recursion into a specific subpattern, and it is
- false, but the test was set up by name, scan the table to see if the
- name refers to any other numbers, and test them. The condition is true
- if any one is set. */
-
- if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
- {
- uschar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == recno) break;
- slotA += md->name_entry_size;
- }
-
- /* Found a name for the number - there can be only one; duplicate
- names for different numbers are allowed, but not vice versa. First
- scan down for duplicates. */
-
- if (i < md->name_count)
- {
- uschar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
-
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- }
- }
- }
-
- /* Chose branch according to the condition */
-
- ecode += condition? 3 : GET(ecode, 1);
- }
+ offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
+ condition = md->recursive != NULL &&
+ (offset == RREF_ANY || offset == md->recursive->group_num);
+ ecode += condition? 3 : GET(ecode, 1);
}
- else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
+ else if (condcode == OP_CREF) /* Group used test */
{
offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
condition = offset < offset_top && md->offset_vector[offset] >= 0;
-
- /* If the numbered capture is unset, but the reference was by name,
- scan the table to see if the name refers to any other numbers, and test
- them. The condition is true if any one is set. This is tediously similar
- to the code above, but not close enough to try to amalgamate. */
-
- if (!condition && condcode == OP_NCREF)
- {
- int refno = offset >> 1;
- uschar *slotA = md->name_table;
-
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == refno) break;
- slotA += md->name_entry_size;
- }
-
- /* Found a name for the number - there can be only one; duplicate names
- for different numbers are allowed, but not vice versa. First scan down
- for duplicates. */
-
- if (i < md->name_count)
- {
- uschar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
-
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- }
- }
- }
-
- /* Chose branch according to the condition */
-
ecode += condition? 3 : GET(ecode, 1);
}
@@ -1030,30 +895,6 @@ for (;;)
break;
- /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
- to close any currently open capturing brackets. */
-
- case OP_CLOSE:
- number = GET2(ecode, 1);
- offset = number << 1;
-
-#ifdef PCRE_DEBUG
- printf("end bracket %d at *ACCEPT", number);
- printf("\n");
-#endif
-
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
- {
- md->offset_vector[offset] =
- md->offset_vector[md->offset_end - number];
- md->offset_vector[offset+1] = eptr - md->start_subject;
- if (offset_top <= offset) offset_top = offset + 2;
- }
- ecode += 3;
- break;
-
-
/* End of the pattern, either real or forced. If we are in a top-level
recursion, we should restore the offsets appropriately and continue from
after the call. */
@@ -1067,26 +908,16 @@ for (;;)
md->recursive = rec->prevrec;
memmove(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
- offset_top = rec->save_offset_top;
mstart = rec->save_start;
ims = original_ims;
ecode = rec->after_call;
break;
}
- /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
- set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
- the subject. In both cases, backtracking will then try other alternatives,
- if any. */
-
- if (eptr == mstart &&
- (md->notempty ||
- (md->notempty_atstart &&
- mstart == md->start_subject + md->start_offset)))
- RRETURN(MATCH_NOMATCH);
-
- /* Otherwise, we have a match. */
+ /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
+ string - backtracking will then try other alternatives, if any. */
+ if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
md->end_match_ptr = eptr; /* Record where we ended */
md->end_offset_top = offset_top; /* and how many extracts were taken */
md->start_match_ptr = mstart; /* and the start (\K can modify) */
@@ -1131,9 +962,7 @@ for (;;)
offset_top = md->end_offset_top;
continue;
- /* Negative assertion: all branches must fail to match. Encountering SKIP,
- PRUNE, or COMMIT means we must assume failure without checking subsequent
- branches. */
+ /* Negative assertion: all branches must fail to match */
case OP_ASSERT_NOT:
case OP_ASSERTBACK_NOT:
@@ -1142,11 +971,6 @@ for (;;)
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
RM5);
if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
- if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
- {
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- break;
- }
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode,1);
}
@@ -1184,9 +1008,8 @@ for (;;)
if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
}
- /* Save the earliest consulted character, then skip to next op code */
+ /* Skip to next op code */
- if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
ecode += 1 + LINK_SIZE;
break;
@@ -1266,7 +1089,6 @@ for (;;)
memcpy(new_recursive.offset_save, md->offset_vector,
new_recursive.saved_max * sizeof(int));
new_recursive.save_start = mstart;
- new_recursive.save_offset_top = offset_top;
mstart = eptr;
/* OK, now we can do the recursion. For each top-level alternative we
@@ -1466,7 +1288,7 @@ for (;;)
number = GET2(prev, 1+LINK_SIZE);
offset = number << 1;
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
printf("end bracket %d", number);
printf("\n");
#endif
@@ -1491,7 +1313,6 @@ for (;;)
mstart = rec->save_start;
memcpy(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
- offset_top = rec->save_offset_top;
ecode = rec->after_call;
ims = original_ims;
break;
@@ -1631,8 +1452,7 @@ for (;;)
/* Find out if the previous and current characters are "word" characters.
It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
- be "non-word" characters. Remember the earliest consulted character for
- partial matching. */
+ be "non-word" characters. */
#ifdef SUPPORT_UTF8
if (utf8)
@@ -1641,16 +1461,10 @@ for (;;)
{
USPTR lastptr = eptr - 1;
while((*lastptr & 0xc0) == 0x80) lastptr--;
- if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
GETCHAR(c, lastptr);
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
}
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- cur_is_word = FALSE;
- }
- else
+ if (eptr >= md->end_subject) cur_is_word = FALSE; else
{
GETCHAR(c, eptr);
cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
@@ -1659,20 +1473,13 @@ for (;;)
else
#endif
- /* Not in UTF-8 mode */
+ /* More streamlined when not in UTF-8 mode */
{
- if (eptr == md->start_subject) prev_is_word = FALSE; else
- {
- if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
- prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
- }
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- cur_is_word = FALSE;
- }
- else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
+ prev_is_word = (eptr != md->start_subject) &&
+ ((md->ctypes[eptr[-1]] & ctype_word) != 0);
+ cur_is_word = (eptr < md->end_subject) &&
+ ((md->ctypes[*eptr] & ctype_word) != 0);
}
/* Now see if the situation is what we want */
@@ -1690,11 +1497,7 @@ for (;;)
/* Fall through */
case OP_ALLANY:
- if (eptr++ >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
ecode++;
break;
@@ -1703,20 +1506,12 @@ for (;;)
any byte, even newline, independent of the setting of PCRE_DOTALL. */
case OP_ANYBYTE:
- if (eptr++ >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
ecode++;
break;
case OP_NOT_DIGIT:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1729,11 +1524,7 @@ for (;;)
break;
case OP_DIGIT:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1746,11 +1537,7 @@ for (;;)
break;
case OP_NOT_WHITESPACE:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1763,11 +1550,7 @@ for (;;)
break;
case OP_WHITESPACE:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1780,11 +1563,7 @@ for (;;)
break;
case OP_NOT_WORDCHAR:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1797,11 +1576,7 @@ for (;;)
break;
case OP_WORDCHAR:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (
#ifdef SUPPORT_UTF8
@@ -1814,11 +1589,7 @@ for (;;)
break;
case OP_ANYNL:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1842,11 +1613,7 @@ for (;;)
break;
case OP_NOT_HSPACE:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1876,11 +1643,7 @@ for (;;)
break;
case OP_HSPACE:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1910,11 +1673,7 @@ for (;;)
break;
case OP_NOT_VSPACE:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1932,11 +1691,7 @@ for (;;)
break;
case OP_VSPACE:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
switch(c)
{
@@ -1959,11 +1714,7 @@ for (;;)
case OP_PROP:
case OP_NOTPROP:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
const ucd_record *prop = GET_UCD(c);
@@ -2008,11 +1759,7 @@ for (;;)
is in the binary; otherwise a compile-time error occurs. */
case OP_EXTUNI:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
int category = UCD_CATEGORY(c);
@@ -2092,11 +1839,7 @@ for (;;)
break;
default: /* No repeat follows */
- if (!match_ref(offset, eptr, length, md, ims))
- {
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
eptr += length;
continue; /* With the main loop */
}
@@ -2112,11 +1855,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
- if (!match_ref(offset, eptr, length, md, ims))
- {
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
eptr += length;
}
@@ -2133,12 +1872,8 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (!match_ref(offset, eptr, length, md, ims))
- {
- CHECK_PARTIAL();
+ if (fi >= max || !match_ref(offset, eptr, length, md, ims))
RRETURN(MATCH_NOMATCH);
- }
eptr += length;
}
/* Control never gets here */
@@ -2151,11 +1886,7 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (!match_ref(offset, eptr, length, md, ims))
- {
- CHECK_PARTIAL();
- break;
- }
+ if (!match_ref(offset, eptr, length, md, ims)) break;
eptr += length;
}
while (eptr >= pp)
@@ -2169,6 +1900,8 @@ for (;;)
}
/* Control never gets here */
+
+
/* Match a bit-mapped character class, possibly repeatedly. This op code is
used when all the characters in the class have values in the range 0-255,
and either the matching is caseful, or the characters are in the range
@@ -2223,11 +1956,7 @@ for (;;)
{
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (c > 255)
{
@@ -2245,11 +1974,7 @@ for (;;)
{
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
c = *eptr++;
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
@@ -2273,12 +1998,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (c > 255)
{
@@ -2298,12 +2018,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
c = *eptr++;
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
@@ -2324,11 +2039,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c > 255)
{
@@ -2354,11 +2065,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
c = *eptr;
if ((data[c/8] & (1 << (c&7))) == 0) break;
eptr++;
@@ -2420,11 +2127,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
}
@@ -2443,12 +2146,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
}
@@ -2463,11 +2161,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLENTEST(c, eptr, len);
if (!_pcre_xclass(c, data)) break;
eptr += len;
@@ -2495,11 +2189,7 @@ for (;;)
length = 1;
ecode++;
GETCHARLEN(fc, ecode, length);
- if (length > md->end_subject - eptr)
- {
- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
+ if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
}
else
@@ -2507,11 +2197,7 @@ for (;;)
/* Non-UTF-8 mode */
{
- if (md->end_subject - eptr < 1)
- {
- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
+ if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
@@ -2527,11 +2213,7 @@ for (;;)
ecode++;
GETCHARLEN(fc, ecode, length);
- if (length > md->end_subject - eptr)
- {
- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
+ if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
/* If the pattern character's value is < 128, we have only one byte, and
can use the fast lookup table. */
@@ -2566,11 +2248,7 @@ for (;;)
/* Non-UTF-8 mode */
{
- if (md->end_subject - eptr < 1)
- {
- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
+ if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
@@ -2624,12 +2302,13 @@ for (;;)
case OP_MINQUERY:
c = *ecode++ - OP_STAR;
minimize = (c & 1) != 0;
-
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
- /* Common code for all repeated single-character matches. */
+ /* Common code for all repeated single-character matches. We can give
+ up quickly if there are fewer than the minimum number of characters left in
+ the subject. */
REPEATCHAR:
#ifdef SUPPORT_UTF8
@@ -2638,6 +2317,7 @@ for (;;)
length = 1;
charptr = ecode;
GETCHARLEN(fc, ecode, length);
+ if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
ecode += length;
/* Handle multibyte character matching specially here. There is
@@ -2655,18 +2335,18 @@ for (;;)
for (i = 1; i <= min; i++)
{
- if (eptr <= md->end_subject - length &&
- memcmp(eptr, charptr, length) == 0) eptr += length;
+ if (memcmp(eptr, charptr, length) == 0) eptr += length;
#ifdef SUPPORT_UCP
- else if (oclength > 0 &&
- eptr <= md->end_subject - oclength &&
- memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
-#endif /* SUPPORT_UCP */
+ /* Need braces because of following else */
+ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
else
{
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
+ if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
+ eptr += oclength;
}
+#else /* without SUPPORT_UCP */
+ else { RRETURN(MATCH_NOMATCH); }
+#endif /* SUPPORT_UCP */
}
if (min == max) continue;
@@ -2677,19 +2357,19 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr <= md->end_subject - length &&
- memcmp(eptr, charptr, length) == 0) eptr += length;
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+ if (memcmp(eptr, charptr, length) == 0) eptr += length;
#ifdef SUPPORT_UCP
- else if (oclength > 0 &&
- eptr <= md->end_subject - oclength &&
- memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
-#endif /* SUPPORT_UCP */
+ /* Need braces because of following else */
+ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
else
{
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
+ if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
+ eptr += oclength;
}
+#else /* without SUPPORT_UCP */
+ else { RRETURN (MATCH_NOMATCH); }
+#endif /* SUPPORT_UCP */
}
/* Control never gets here */
}
@@ -2699,34 +2379,33 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr <= md->end_subject - length &&
- memcmp(eptr, charptr, length) == 0) eptr += length;
+ if (eptr > md->end_subject - length) break;
+ if (memcmp(eptr, charptr, length) == 0) eptr += length;
#ifdef SUPPORT_UCP
- else if (oclength > 0 &&
- eptr <= md->end_subject - oclength &&
- memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
-#endif /* SUPPORT_UCP */
+ else if (oclength == 0) break;
else
{
- CHECK_PARTIAL();
- break;
+ if (memcmp(eptr, occhars, oclength) != 0) break;
+ eptr += oclength;
}
+#else /* without SUPPORT_UCP */
+ else break;
+#endif /* SUPPORT_UCP */
}
if (possessive) continue;
-
for(;;)
- {
- RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
+ {
+ RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (eptr == pp) RRETURN(MATCH_NOMATCH);
#ifdef SUPPORT_UCP
- eptr--;
- BACKCHAR(eptr);
+ eptr--;
+ BACKCHAR(eptr);
#else /* without SUPPORT_UCP */
- eptr -= length;
+ eptr -= length;
#endif /* SUPPORT_UCP */
- }
+ }
}
/* Control never gets here */
}
@@ -2739,8 +2418,10 @@ for (;;)
#endif /* SUPPORT_UTF8 */
/* When not in UTF-8 mode, load a single-byte character. */
-
- fc = *ecode++;
+ {
+ if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
+ fc = *ecode++;
+ }
/* The value of fc at this point is always less than 256, though we may or
may not be in UTF-8 mode. The code is duplicated for the caseless and
@@ -2758,14 +2439,7 @@ for (;;)
{
fc = md->lcc[fc];
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
- }
if (min == max) continue;
if (minimize)
{
@@ -2773,13 +2447,9 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (fi >= max || eptr >= md->end_subject ||
+ fc != md->lcc[*eptr++])
RRETURN(MATCH_NOMATCH);
- }
- if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -2788,17 +2458,10 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc != md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
eptr++;
}
-
if (possessive) continue;
-
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
@@ -2814,31 +2477,16 @@ for (;;)
else
{
- for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
- }
-
+ for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
if (min == max) continue;
-
if (minimize)
{
for (fi = min;; fi++)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
RRETURN(MATCH_NOMATCH);
- }
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -2847,16 +2495,10 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc != *eptr) break;
+ if (eptr >= md->end_subject || fc != *eptr) break;
eptr++;
}
if (possessive) continue;
-
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
@@ -2872,11 +2514,7 @@ for (;;)
checking can be multibyte. */
case OP_NOT:
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
ecode++;
GETCHARINCTEST(c, eptr);
if ((ims & PCRE_CASELESS) != 0)
@@ -2953,9 +2591,12 @@ for (;;)
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
- /* Common code for all repeated single-byte matches. */
+ /* Common code for all repeated single-byte matches. We can give up quickly
+ if there are fewer than the minimum number of bytes left in the
+ subject. */
REPEATNOTCHAR:
+ if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
fc = *ecode++;
/* The code is duplicated for the caseless and caseful cases, for speed,
@@ -2980,11 +2621,6 @@ for (;;)
register unsigned int d;
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
if (fc == d) RRETURN(MATCH_NOMATCH);
@@ -2996,14 +2632,7 @@ for (;;)
/* Not UTF-8 mode */
{
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
- }
}
if (min == max) continue;
@@ -3019,15 +2648,11 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
if (fc == d) RRETURN(MATCH_NOMATCH);
+
}
}
else
@@ -3038,13 +2663,8 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
RRETURN(MATCH_NOMATCH);
- }
- if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
@@ -3064,11 +2684,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(d, eptr, len);
if (d < 256) d = md->lcc[d];
if (fc == d) break;
@@ -3089,12 +2705,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc == md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
eptr++;
}
if (possessive) continue;
@@ -3122,11 +2733,6 @@ for (;;)
register unsigned int d;
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
GETCHARINC(d, eptr);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
@@ -3136,14 +2742,7 @@ for (;;)
/* Not UTF-8 mode */
{
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
- }
}
if (min == max) continue;
@@ -3159,12 +2758,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
@@ -3177,13 +2771,8 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
RRETURN(MATCH_NOMATCH);
- }
- if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
@@ -3203,11 +2792,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(d, eptr, len);
if (fc == d) break;
eptr += len;
@@ -3227,12 +2812,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc == *eptr) break;
+ if (eptr >= md->end_subject || fc == *eptr) break;
eptr++;
}
if (possessive) continue;
@@ -3326,10 +2906,13 @@ for (;;)
/* First, ensure the minimum number of matches are present. Use inline
code for maximizing the speed, and do the type test once at the start
- (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
+ (i.e. keep it out of the loop). Also we can test that there are at least
+ the minimum number of bytes before we start. This isn't as effective in
+ UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
is tidier. Also separate the UCP code, which can be the same for both UTF-8
and single-bytes. */
+ if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
if (min > 0)
{
#ifdef SUPPORT_UCP
@@ -3341,11 +2924,7 @@ for (;;)
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
}
break;
@@ -3353,11 +2932,7 @@ for (;;)
case PT_LAMP:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -3370,11 +2945,7 @@ for (;;)
case PT_GC:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -3385,11 +2956,7 @@ for (;;)
case PT_PC:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3400,11 +2967,7 @@ for (;;)
case PT_SC:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -3424,19 +2987,16 @@ for (;;)
{
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
int len = 1;
- if (!utf8) c = *eptr;
- else { GETCHARLEN(c, eptr, len); }
+ if (!utf8) c = *eptr; else
+ {
+ GETCHARLEN(c, eptr, len);
+ }
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
@@ -3455,12 +3015,8 @@ for (;;)
case OP_ANY:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || IS_NEWLINE(eptr))
RRETURN(MATCH_NOMATCH);
- }
- if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -3469,29 +3025,20 @@ for (;;)
case OP_ALLANY:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
break;
case OP_ANYBYTE:
- if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
eptr += min;
break;
case OP_ANYNL:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
switch(c)
{
@@ -3517,11 +3064,7 @@ for (;;)
case OP_NOT_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
switch(c)
{
@@ -3553,11 +3096,7 @@ for (;;)
case OP_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
switch(c)
{
@@ -3589,11 +3128,7 @@ for (;;)
case OP_NOT_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
switch(c)
{
@@ -3613,11 +3148,7 @@ for (;;)
case OP_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
switch(c)
{
@@ -3637,11 +3168,7 @@ for (;;)
case OP_NOT_DIGIT:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
RRETURN(MATCH_NOMATCH);
@@ -3651,12 +3178,8 @@ for (;;)
case OP_DIGIT:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
+ if (eptr >= md->end_subject ||
+ *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
/* No need to skip more bytes - we know it's a 1-byte character */
}
@@ -3665,12 +3188,8 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
+ if (eptr >= md->end_subject ||
+ (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
RRETURN(MATCH_NOMATCH);
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
}
@@ -3679,12 +3198,8 @@ for (;;)
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
+ if (eptr >= md->end_subject ||
+ *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
/* No need to skip more bytes - we know it's a 1-byte character */
}
@@ -3693,12 +3208,8 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
+ if (eptr >= md->end_subject ||
+ (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
RRETURN(MATCH_NOMATCH);
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
}
@@ -3707,12 +3218,8 @@ for (;;)
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
+ if (eptr >= md->end_subject ||
+ *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
/* No need to skip more bytes - we know it's a 1-byte character */
}
@@ -3726,49 +3233,34 @@ for (;;)
#endif /* SUPPORT_UTF8 */
/* Code for the non-UTF-8 case for minimum matching of operators other
- than OP_PROP and OP_NOTPROP. */
+ than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
+ number of bytes present, as this was tested above. */
switch(ctype)
{
case OP_ANY:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
eptr++;
}
break;
case OP_ALLANY:
- if (eptr > md->end_subject - min)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
eptr += min;
break;
case OP_ANYBYTE:
- if (eptr > md->end_subject - min)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
eptr += min;
break;
+ /* Because of the CRLF case, we can't assume the minimum number of
+ bytes are present in this case. */
+
case OP_ANYNL:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
@@ -3790,11 +3282,7 @@ for (;;)
case OP_NOT_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
switch(*eptr++)
{
default: break;
@@ -3809,11 +3297,7 @@ for (;;)
case OP_HSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
@@ -3828,11 +3312,7 @@ for (;;)
case OP_NOT_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
switch(*eptr++)
{
default: break;
@@ -3849,11 +3329,7 @@ for (;;)
case OP_VSPACE:
for (i = 1; i <= min; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
switch(*eptr++)
{
default: RRETURN(MATCH_NOMATCH);
@@ -3869,76 +3345,34 @@ for (;;)
case OP_NOT_DIGIT:
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
- }
break;
case OP_DIGIT:
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
- }
break;
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
- }
break;
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
- }
break;
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if ((md->ctypes[*eptr++] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
- }
break;
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
- {
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
if ((md->ctypes[*eptr++] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
- }
break;
default:
@@ -3966,12 +3400,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
}
@@ -3982,12 +3411,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -4002,12 +3426,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -4020,12 +3439,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -4038,12 +3452,7 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -4065,20 +3474,17 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
int len = 1;
- if (!utf8) c = *eptr;
- else { GETCHARLEN(c, eptr, len); }
+ if (!utf8) c = *eptr; else
+ {
+ GETCHARLEN(c, eptr, len);
+ }
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
@@ -4097,14 +3503,10 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (ctype == OP_ANY && IS_NEWLINE(eptr))
+ if (fi >= max || eptr >= md->end_subject ||
+ (ctype == OP_ANY && IS_NEWLINE(eptr)))
RRETURN(MATCH_NOMATCH);
+
GETCHARINC(c, eptr);
switch(ctype)
{
@@ -4260,14 +3662,10 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (ctype == OP_ANY && IS_NEWLINE(eptr))
+ if (fi >= max || eptr >= md->end_subject ||
+ (ctype == OP_ANY && IS_NEWLINE(eptr)))
RRETURN(MATCH_NOMATCH);
+
c = *eptr++;
switch(ctype)
{
@@ -4392,11 +3790,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (prop_fail_result) break;
eptr+= len;
@@ -4407,11 +3801,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -4426,11 +3816,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -4443,11 +3829,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -4460,11 +3842,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -4493,11 +3871,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
@@ -4517,7 +3891,6 @@ for (;;)
/* eptr is now past the end of the maximum run */
if (possessive) continue;
-
for(;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -4553,12 +3926,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4570,12 +3938,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4587,11 +3950,7 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4604,22 +3963,15 @@ for (;;)
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- {
- eptr = md->end_subject;
- SCHECK_PARTIAL();
- }
- else eptr += c;
+ c = md->end_subject - eptr;
+ eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c == 0x000d)
{
@@ -4644,11 +3996,7 @@ for (;;)
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4686,11 +4034,7 @@ for (;;)
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4714,11 +4058,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
eptr+= len;
@@ -4729,11 +4069,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
eptr+= len;
@@ -4744,11 +4080,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
eptr+= len;
@@ -4759,11 +4091,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
eptr+= len;
@@ -4774,11 +4102,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
eptr+= len;
@@ -4789,11 +4113,7 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
eptr+= len;
@@ -4825,12 +4145,7 @@ for (;;)
case OP_ANY:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
eptr++;
}
break;
@@ -4839,21 +4154,14 @@ for (;;)
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- {
- eptr = md->end_subject;
- SCHECK_PARTIAL();
- }
- else eptr += c;
+ c = md->end_subject - eptr;
+ eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
c = *eptr;
if (c == 0x000d)
{
@@ -4874,11 +4182,7 @@ for (;;)
case OP_NOT_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
c = *eptr;
if (c == 0x09 || c == 0x20 || c == 0xa0) break;
eptr++;
@@ -4888,11 +4192,7 @@ for (;;)
case OP_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
c = *eptr;
if (c != 0x09 && c != 0x20 && c != 0xa0) break;
eptr++;
@@ -4902,11 +4202,7 @@ for (;;)
case OP_NOT_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
c = *eptr;
if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
break;
@@ -4917,11 +4213,7 @@ for (;;)
case OP_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
+ if (eptr >= md->end_subject) break;
c = *eptr;
if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
break;
@@ -4932,12 +4224,8 @@ for (;;)
case OP_NOT_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
break;
- }
- if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
eptr++;
}
break;
@@ -4945,12 +4233,8 @@ for (;;)
case OP_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
break;
- }
- if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
eptr++;
}
break;
@@ -4958,12 +4242,8 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
break;
- }
- if ((md->ctypes[*eptr] & ctype_space) != 0) break;
eptr++;
}
break;
@@ -4971,12 +4251,8 @@ for (;;)
case OP_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
break;
- }
- if ((md->ctypes[*eptr] & ctype_space) == 0) break;
eptr++;
}
break;
@@ -4984,12 +4260,8 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
break;
- }
- if ((md->ctypes[*eptr] & ctype_word) != 0) break;
eptr++;
}
break;
@@ -4997,12 +4269,8 @@ for (;;)
case OP_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
+ if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
break;
- }
- if ((md->ctypes[*eptr] & ctype_word) == 0) break;
eptr++;
}
break;
@@ -5180,7 +4448,6 @@ const uschar *tables;
const uschar *start_bits = NULL;
USPTR start_match = (USPTR)subject + start_offset;
USPTR end_subject;
-USPTR start_partial = NULL;
USPTR req_byte_ptr = start_match - 1;
pcre_study_data internal_study;
@@ -5197,13 +4464,6 @@ if (re == NULL || subject == NULL ||
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
-/* This information is for finding all the numbers associated with a given
-name, for condition testing. */
-
-md->name_table = (uschar *)re + re->name_table_offset;
-md->name_count = re->name_count;
-md->name_entry_size = re->name_entry_size;
-
/* Fish out the optional data from the extra_data structure, first setting
the default values. */
@@ -5271,9 +4531,7 @@ md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
md->notbol = (options & PCRE_NOTBOL) != 0;
md->noteol = (options & PCRE_NOTEOL) != 0;
md->notempty = (options & PCRE_NOTEMPTY) != 0;
-md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
-md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
- ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
+md->partial = (options & PCRE_PARTIAL) != 0;
md->hitend = FALSE;
md->recursive = NULL; /* No recursion at top level */
@@ -5347,9 +4605,8 @@ else
}
}
-/* Partial matching was originally supported only for a restricted set of
-regexes; from release 8.00 there are no restrictions, but the bits are still
-defined (though never set). So there's no harm in leaving this code. */
+/* Partial matching is supported only for a restricted set of regexes at the
+moment. */
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
return PCRE_ERROR_BADPARTIAL;
@@ -5436,7 +4693,7 @@ if (!anchored)
}
else
if (!startline && study != NULL &&
- (study->flags & PCRE_STUDY_MAPPED) != 0)
+ (study->options & PCRE_STUDY_MAPPED) != 0)
start_bits = study->start_bits;
}
@@ -5563,94 +4820,79 @@ for(;;)
end_subject = save_end_subject;
- /* The following two optimizations are disabled for partial matching or if
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+ printf(">>>> Match against: ");
+ pchars(start_match, end_subject - start_match, TRUE, md);
+ printf("\n");
+#endif
+
+ /* If req_byte is set, we know that that character must appear in the
+ subject for the match to succeed. If the first character is set, req_byte
+ must be later in the subject; otherwise the test starts at the match point.
+ This optimization can save a huge amount of backtracking in patterns with
+ nested unlimited repeats that aren't going to match. Writing separate code
+ for cased/caseless versions makes it go faster, as does using an
+ autoincrement and backing off on a match.
+
+ HOWEVER: when the subject string is very, very long, searching to its end
+ can take a long time, and give bad performance on quite ordinary patterns.
+ This showed up when somebody was matching something like /^\d+C/ on a
+ 32-megabyte string... so we don't do this when the string is sufficiently
+ long.
+
+ ALSO: this processing is disabled when partial matching is requested, or if
disabling is explicitly requested. */
- if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
+ if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
+ req_byte >= 0 &&
+ end_subject - start_match < REQ_BYTE_MAX &&
+ !md->partial)
{
- /* If the pattern was studied, a minimum subject length may be set. This is
- a lower bound; no actual string of that length may actually match the
- pattern. Although the value is, strictly, in characters, we treat it as
- bytes to avoid spending too much time in this optimization. */
+ register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
- if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
- (pcre_uint32)(end_subject - start_match) < study->minlength)
- {
- rc = MATCH_NOMATCH;
- break;
- }
+ /* We don't need to repeat the search if we haven't yet reached the
+ place we found it at last time. */
- /* If req_byte is set, we know that that character must appear in the
- subject for the match to succeed. If the first character is set, req_byte
- must be later in the subject; otherwise the test starts at the match point.
- This optimization can save a huge amount of backtracking in patterns with
- nested unlimited repeats that aren't going to match. Writing separate code
- for cased/caseless versions makes it go faster, as does using an
- autoincrement and backing off on a match.
-
- HOWEVER: when the subject string is very, very long, searching to its end
- can take a long time, and give bad performance on quite ordinary patterns.
- This showed up when somebody was matching something like /^\d+C/ on a
- 32-megabyte string... so we don't do this when the string is sufficiently
- long. */
-
- if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
+ if (p > req_byte_ptr)
{
- register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
-
- /* We don't need to repeat the search if we haven't yet reached the
- place we found it at last time. */
-
- if (p > req_byte_ptr)
+ if (req_byte_caseless)
{
- if (req_byte_caseless)
+ while (p < end_subject)
{
- while (p < end_subject)
- {
- register int pp = *p++;
- if (pp == req_byte || pp == req_byte2) { p--; break; }
- }
+ register int pp = *p++;
+ if (pp == req_byte || pp == req_byte2) { p--; break; }
}
- else
+ }
+ else
+ {
+ while (p < end_subject)
{
- while (p < end_subject)
- {
- if (*p++ == req_byte) { p--; break; }
- }
+ if (*p++ == req_byte) { p--; break; }
}
+ }
- /* If we can't find the required character, break the matching loop,
- forcing a match failure. */
+ /* If we can't find the required character, break the matching loop,
+ forcing a match failure. */
- if (p >= end_subject)
- {
- rc = MATCH_NOMATCH;
- break;
- }
+ if (p >= end_subject)
+ {
+ rc = MATCH_NOMATCH;
+ break;
+ }
- /* If we have found the required character, save the point where we
- found it, so that we don't search again next time round the loop if
- the start hasn't passed this character yet. */
+ /* If we have found the required character, save the point where we
+ found it, so that we don't search again next time round the loop if
+ the start hasn't passed this character yet. */
- req_byte_ptr = p;
- }
+ req_byte_ptr = p;
}
}
-#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
- printf(">>>> Match against: ");
- pchars(start_match, end_subject - start_match, TRUE, md);
- printf("\n");
-#endif
-
- /* OK, we can now run the match. If "hitend" is set afterwards, remember the
- first starting point for which a partial match was found. */
+ /* OK, we can now run the match. */
md->start_match_ptr = start_match;
- md->start_used_ptr = start_match;
md->match_call_count = 0;
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
- if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
switch(rc)
{
@@ -5680,7 +4922,7 @@ for(;;)
rc = MATCH_NOMATCH;
goto ENDLOOP;
- /* Any other return is either a match, or some kind of error. */
+ /* Any other return is some kind of error. */
default:
goto ENDLOOP;
@@ -5786,19 +5028,14 @@ if (using_temporary_offsets)
(pcre_free)(md->offset_vector);
}
-if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
+if (rc != MATCH_NOMATCH)
{
DPRINTF((">>>> error: returning %d\n", rc));
return rc;
}
-else if (start_partial != NULL)
+else if (md->partial && md->hitend)
{
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
- if (offsetcount > 1)
- {
- offsets[0] = start_partial - (USPTR)subject;
- offsets[1] = end_subject - (USPTR)subject;
- }
return PCRE_ERROR_PARTIAL;
}
else