diff options
| author | Ilia Alshanetsky <iliaa@php.net> | 2010-02-03 12:59:00 +0000 |
|---|---|---|
| committer | Ilia Alshanetsky <iliaa@php.net> | 2010-02-03 12:59:00 +0000 |
| commit | 91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8 (patch) | |
| tree | 1817d9652f110cf0cd4644ed8586ca56d0c05d9e /ext/pcre/pcrelib/pcre_exec.c | |
| parent | 43d5429381237518ced74149f29a851c30307bea (diff) | |
| download | php-git-91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8.tar.gz | |
Downgrade bundled PCRE to version 7.9 due to 8.0+ version use of C99
Diffstat (limited to 'ext/pcre/pcrelib/pcre_exec.c')
| -rw-r--r-- | ext/pcre/pcrelib/pcre_exec.c | 1323 |
1 files changed, 280 insertions, 1043 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c index 46f667e21b..073cf2410a 100644 --- a/ext/pcre/pcrelib/pcre_exec.c +++ b/ext/pcre/pcrelib/pcre_exec.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2010 University of Cambridge + Copyright (c) 1997-2009 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -87,7 +87,7 @@ static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; -#ifdef PCRE_DEBUG +#ifdef DEBUG /************************************************* * Debugging function to print chars * *************************************************/ @@ -139,7 +139,7 @@ match_ref(int offset, register USPTR eptr, int length, match_data *md, { USPTR p = md->start_subject + md->offset_vector[offset]; -#ifdef PCRE_DEBUG +#ifdef DEBUG if (eptr >= md->end_subject) printf("matching subject <null>"); else @@ -252,7 +252,7 @@ actuall used in this definition. */ #ifndef NO_RECURSE #define REGISTER register -#ifdef PCRE_DEBUG +#ifdef DEBUG #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \ { \ printf("match() called in line %d\n", __LINE__); \ @@ -396,32 +396,10 @@ typedef struct heapframe { /* This function is called recursively in many circumstances. Whenever it returns a negative (error) response, the outer incarnation must also return the -same response. */ +same response. -/* These macros pack up tests that are used for partial matching, and which -appears several times in the code. We set the "hit end" flag if the pointer is -at the end of the subject and also past the start of the subject (i.e. -something has been matched). For hard partial matching, we then return -immediately. The second one is used when we already know we are past the end of -the subject. */ - -#define CHECK_PARTIAL()\ - if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\ - {\ - md->hitend = TRUE;\ - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\ - } - -#define SCHECK_PARTIAL()\ - if (md->partial != 0 && eptr > mstart)\ - {\ - md->hitend = TRUE;\ - if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\ - } - - -/* Performance note: It might be tempting to extract commonly used fields from -the md structure (e.g. utf8, end_subject) into individual variables to improve +Performance note: It might be tempting to extract commonly used fields from the +md structure (e.g. utf8, end_subject) into individual variables to improve performance. Tests using gcc on a SPARC disproved this; in the first case, it made performance worse. @@ -620,7 +598,7 @@ TAIL_RECURSE: /* OK, now we can get on with the real code of the function. Recursive calls are specified by the macro RMATCH and RRETURN is used to return. When NO_RECURSE is *not* defined, these just turn into a recursive call to match() -and a "return", respectively (possibly with some debugging if PCRE_DEBUG is +and a "return", respectively (possibly with some debugging if DEBUG is defined). However, RMATCH isn't like a function call because it's quite a complicated macro. It has to be used in one particular way. This shouldn't, however, impact performance when true recursion is being used. */ @@ -662,6 +640,14 @@ for (;;) minimize = possessive = FALSE; op = *ecode; + /* For partial matching, remember if we ever hit the end of the subject after + matching at least one subject character. */ + + if (md->partial && + eptr >= md->end_subject && + eptr > mstart) + md->hitend = TRUE; + switch(op) { case OP_FAIL: @@ -711,7 +697,7 @@ for (;;) number = GET2(ecode, 1+LINK_SIZE); offset = number << 1; -#ifdef PCRE_DEBUG +#ifdef DEBUG printf("start bracket %d\n", number); printf("subject="); pchars(eptr, 16, TRUE, md); @@ -837,139 +823,18 @@ for (;;) /* Now see what the actual condition is */ - if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */ + if (condcode == OP_RREF) /* Recursion test */ { - if (md->recursive == NULL) /* Not recursing => FALSE */ - { - condition = FALSE; - ecode += GET(ecode, 1); - } - else - { - int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ - condition = (recno == RREF_ANY || recno == md->recursive->group_num); - - /* If the test is for recursion into a specific subpattern, and it is - false, but the test was set up by name, scan the table to see if the - name refers to any other numbers, and test them. The condition is true - if any one is set. */ - - if (!condition && condcode == OP_NRREF && recno != RREF_ANY) - { - uschar *slotA = md->name_table; - for (i = 0; i < md->name_count; i++) - { - if (GET2(slotA, 0) == recno) break; - slotA += md->name_entry_size; - } - - /* Found a name for the number - there can be only one; duplicate - names for different numbers are allowed, but not vice versa. First - scan down for duplicates. */ - - if (i < md->name_count) - { - uschar *slotB = slotA; - while (slotB > md->name_table) - { - slotB -= md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) - { - condition = GET2(slotB, 0) == md->recursive->group_num; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - - if (!condition) - { - slotB = slotA; - for (i++; i < md->name_count; i++) - { - slotB += md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) - { - condition = GET2(slotB, 0) == md->recursive->group_num; - if (condition) break; - } - else break; - } - } - } - } - - /* Chose branch according to the condition */ - - ecode += condition? 3 : GET(ecode, 1); - } + offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ + condition = md->recursive != NULL && + (offset == RREF_ANY || offset == md->recursive->group_num); + ecode += condition? 3 : GET(ecode, 1); } - else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */ + else if (condcode == OP_CREF) /* Group used test */ { offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ condition = offset < offset_top && md->offset_vector[offset] >= 0; - - /* If the numbered capture is unset, but the reference was by name, - scan the table to see if the name refers to any other numbers, and test - them. The condition is true if any one is set. This is tediously similar - to the code above, but not close enough to try to amalgamate. */ - - if (!condition && condcode == OP_NCREF) - { - int refno = offset >> 1; - uschar *slotA = md->name_table; - - for (i = 0; i < md->name_count; i++) - { - if (GET2(slotA, 0) == refno) break; - slotA += md->name_entry_size; - } - - /* Found a name for the number - there can be only one; duplicate names - for different numbers are allowed, but not vice versa. First scan down - for duplicates. */ - - if (i < md->name_count) - { - uschar *slotB = slotA; - while (slotB > md->name_table) - { - slotB -= md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) - { - offset = GET2(slotB, 0) << 1; - condition = offset < offset_top && - md->offset_vector[offset] >= 0; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - - if (!condition) - { - slotB = slotA; - for (i++; i < md->name_count; i++) - { - slotB += md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) - { - offset = GET2(slotB, 0) << 1; - condition = offset < offset_top && - md->offset_vector[offset] >= 0; - if (condition) break; - } - else break; - } - } - } - } - - /* Chose branch according to the condition */ - ecode += condition? 3 : GET(ecode, 1); } @@ -1030,30 +895,6 @@ for (;;) break; - /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, - to close any currently open capturing brackets. */ - - case OP_CLOSE: - number = GET2(ecode, 1); - offset = number << 1; - -#ifdef PCRE_DEBUG - printf("end bracket %d at *ACCEPT", number); - printf("\n"); -#endif - - md->capture_last = number; - if (offset >= md->offset_max) md->offset_overflow = TRUE; else - { - md->offset_vector[offset] = - md->offset_vector[md->offset_end - number]; - md->offset_vector[offset+1] = eptr - md->start_subject; - if (offset_top <= offset) offset_top = offset + 2; - } - ecode += 3; - break; - - /* End of the pattern, either real or forced. If we are in a top-level recursion, we should restore the offsets appropriately and continue from after the call. */ @@ -1067,26 +908,16 @@ for (;;) md->recursive = rec->prevrec; memmove(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); - offset_top = rec->save_offset_top; mstart = rec->save_start; ims = original_ims; ecode = rec->after_call; break; } - /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is - set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of - the subject. In both cases, backtracking will then try other alternatives, - if any. */ - - if (eptr == mstart && - (md->notempty || - (md->notempty_atstart && - mstart == md->start_subject + md->start_offset))) - RRETURN(MATCH_NOMATCH); - - /* Otherwise, we have a match. */ + /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty + string - backtracking will then try other alternatives, if any. */ + if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH); md->end_match_ptr = eptr; /* Record where we ended */ md->end_offset_top = offset_top; /* and how many extracts were taken */ md->start_match_ptr = mstart; /* and the start (\K can modify) */ @@ -1131,9 +962,7 @@ for (;;) offset_top = md->end_offset_top; continue; - /* Negative assertion: all branches must fail to match. Encountering SKIP, - PRUNE, or COMMIT means we must assume failure without checking subsequent - branches. */ + /* Negative assertion: all branches must fail to match */ case OP_ASSERT_NOT: case OP_ASSERTBACK_NOT: @@ -1142,11 +971,6 @@ for (;;) RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0, RM5); if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH); - if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) - { - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - break; - } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode,1); } @@ -1184,9 +1008,8 @@ for (;;) if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); } - /* Save the earliest consulted character, then skip to next op code */ + /* Skip to next op code */ - if (eptr < md->start_used_ptr) md->start_used_ptr = eptr; ecode += 1 + LINK_SIZE; break; @@ -1266,7 +1089,6 @@ for (;;) memcpy(new_recursive.offset_save, md->offset_vector, new_recursive.saved_max * sizeof(int)); new_recursive.save_start = mstart; - new_recursive.save_offset_top = offset_top; mstart = eptr; /* OK, now we can do the recursion. For each top-level alternative we @@ -1466,7 +1288,7 @@ for (;;) number = GET2(prev, 1+LINK_SIZE); offset = number << 1; -#ifdef PCRE_DEBUG +#ifdef DEBUG printf("end bracket %d", number); printf("\n"); #endif @@ -1491,7 +1313,6 @@ for (;;) mstart = rec->save_start; memcpy(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); - offset_top = rec->save_offset_top; ecode = rec->after_call; ims = original_ims; break; @@ -1631,8 +1452,7 @@ for (;;) /* Find out if the previous and current characters are "word" characters. It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to - be "non-word" characters. Remember the earliest consulted character for - partial matching. */ + be "non-word" characters. */ #ifdef SUPPORT_UTF8 if (utf8) @@ -1641,16 +1461,10 @@ for (;;) { USPTR lastptr = eptr - 1; while((*lastptr & 0xc0) == 0x80) lastptr--; - if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; GETCHAR(c, lastptr); prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; } - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - cur_is_word = FALSE; - } - else + if (eptr >= md->end_subject) cur_is_word = FALSE; else { GETCHAR(c, eptr); cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; @@ -1659,20 +1473,13 @@ for (;;) else #endif - /* Not in UTF-8 mode */ + /* More streamlined when not in UTF-8 mode */ { - if (eptr == md->start_subject) prev_is_word = FALSE; else - { - if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; - prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); - } - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - cur_is_word = FALSE; - } - else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); + prev_is_word = (eptr != md->start_subject) && + ((md->ctypes[eptr[-1]] & ctype_word) != 0); + cur_is_word = (eptr < md->end_subject) && + ((md->ctypes[*eptr] & ctype_word) != 0); } /* Now see if the situation is what we want */ @@ -1690,11 +1497,7 @@ for (;;) /* Fall through */ case OP_ALLANY: - if (eptr++ >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; ecode++; break; @@ -1703,20 +1506,12 @@ for (;;) any byte, even newline, independent of the setting of PCRE_DOTALL. */ case OP_ANYBYTE: - if (eptr++ >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); ecode++; break; case OP_NOT_DIGIT: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1729,11 +1524,7 @@ for (;;) break; case OP_DIGIT: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1746,11 +1537,7 @@ for (;;) break; case OP_NOT_WHITESPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1763,11 +1550,7 @@ for (;;) break; case OP_WHITESPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1780,11 +1563,7 @@ for (;;) break; case OP_NOT_WORDCHAR: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1797,11 +1576,7 @@ for (;;) break; case OP_WORDCHAR: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if ( #ifdef SUPPORT_UTF8 @@ -1814,11 +1589,7 @@ for (;;) break; case OP_ANYNL: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); switch(c) { @@ -1842,11 +1613,7 @@ for (;;) break; case OP_NOT_HSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); switch(c) { @@ -1876,11 +1643,7 @@ for (;;) break; case OP_HSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); switch(c) { @@ -1910,11 +1673,7 @@ for (;;) break; case OP_NOT_VSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); switch(c) { @@ -1932,11 +1691,7 @@ for (;;) break; case OP_VSPACE: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); switch(c) { @@ -1959,11 +1714,7 @@ for (;;) case OP_PROP: case OP_NOTPROP: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); { const ucd_record *prop = GET_UCD(c); @@ -2008,11 +1759,7 @@ for (;;) is in the binary; otherwise a compile-time error occurs. */ case OP_EXTUNI: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); { int category = UCD_CATEGORY(c); @@ -2092,11 +1839,7 @@ for (;;) break; default: /* No repeat follows */ - if (!match_ref(offset, eptr, length, md, ims)) - { - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); eptr += length; continue; /* With the main loop */ } @@ -2112,11 +1855,7 @@ for (;;) for (i = 1; i <= min; i++) { - if (!match_ref(offset, eptr, length, md, ims)) - { - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); eptr += length; } @@ -2133,12 +1872,8 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (!match_ref(offset, eptr, length, md, ims)) - { - CHECK_PARTIAL(); + if (fi >= max || !match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); - } eptr += length; } /* Control never gets here */ @@ -2151,11 +1886,7 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (!match_ref(offset, eptr, length, md, ims)) - { - CHECK_PARTIAL(); - break; - } + if (!match_ref(offset, eptr, length, md, ims)) break; eptr += length; } while (eptr >= pp) @@ -2169,6 +1900,8 @@ for (;;) } /* Control never gets here */ + + /* Match a bit-mapped character class, possibly repeatedly. This op code is used when all the characters in the class have values in the range 0-255, and either the matching is caseful, or the characters are in the range @@ -2223,11 +1956,7 @@ for (;;) { for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (c > 255) { @@ -2245,11 +1974,7 @@ for (;;) { for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } @@ -2273,12 +1998,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (c > 255) { @@ -2298,12 +2018,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } @@ -2324,11 +2039,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c > 255) { @@ -2354,11 +2065,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; c = *eptr; if ((data[c/8] & (1 << (c&7))) == 0) break; eptr++; @@ -2420,11 +2127,7 @@ for (;;) for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } @@ -2443,12 +2146,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } @@ -2463,11 +2161,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLENTEST(c, eptr, len); if (!_pcre_xclass(c, data)) break; eptr += len; @@ -2495,11 +2189,7 @@ for (;;) length = 1; ecode++; GETCHARLEN(fc, ecode, length); - if (length > md->end_subject - eptr) - { - CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ - RRETURN(MATCH_NOMATCH); - } + if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); } else @@ -2507,11 +2197,7 @@ for (;;) /* Non-UTF-8 mode */ { - if (md->end_subject - eptr < 1) - { - SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ - RRETURN(MATCH_NOMATCH); - } + if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); ecode += 2; } @@ -2527,11 +2213,7 @@ for (;;) ecode++; GETCHARLEN(fc, ecode, length); - if (length > md->end_subject - eptr) - { - CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ - RRETURN(MATCH_NOMATCH); - } + if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); /* If the pattern character's value is < 128, we have only one byte, and can use the fast lookup table. */ @@ -2566,11 +2248,7 @@ for (;;) /* Non-UTF-8 mode */ { - if (md->end_subject - eptr < 1) - { - SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ - RRETURN(MATCH_NOMATCH); - } + if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); ecode += 2; } @@ -2624,12 +2302,13 @@ for (;;) case OP_MINQUERY: c = *ecode++ - OP_STAR; minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; - /* Common code for all repeated single-character matches. */ + /* Common code for all repeated single-character matches. We can give + up quickly if there are fewer than the minimum number of characters left in + the subject. */ REPEATCHAR: #ifdef SUPPORT_UTF8 @@ -2638,6 +2317,7 @@ for (;;) length = 1; charptr = ecode; GETCHARLEN(fc, ecode, length); + if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); ecode += length; /* Handle multibyte character matching specially here. There is @@ -2655,18 +2335,18 @@ for (;;) for (i = 1; i <= min; i++) { - if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, length) == 0) eptr += length; + if (memcmp(eptr, charptr, length) == 0) eptr += length; #ifdef SUPPORT_UCP - else if (oclength > 0 && - eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, oclength) == 0) eptr += oclength; -#endif /* SUPPORT_UCP */ + /* Need braces because of following else */ + else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else { - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); + eptr += oclength; } +#else /* without SUPPORT_UCP */ + else { RRETURN(MATCH_NOMATCH); } +#endif /* SUPPORT_UCP */ } if (min == max) continue; @@ -2677,19 +2357,19 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, length) == 0) eptr += length; + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (memcmp(eptr, charptr, length) == 0) eptr += length; #ifdef SUPPORT_UCP - else if (oclength > 0 && - eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, oclength) == 0) eptr += oclength; -#endif /* SUPPORT_UCP */ + /* Need braces because of following else */ + else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else { - CHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); + if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); + eptr += oclength; } +#else /* without SUPPORT_UCP */ + else { RRETURN (MATCH_NOMATCH); } +#endif /* SUPPORT_UCP */ } /* Control never gets here */ } @@ -2699,34 +2379,33 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, length) == 0) eptr += length; + if (eptr > md->end_subject - length) break; + if (memcmp(eptr, charptr, length) == 0) eptr += length; #ifdef SUPPORT_UCP - else if (oclength > 0 && - eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, oclength) == 0) eptr += oclength; -#endif /* SUPPORT_UCP */ + else if (oclength == 0) break; else { - CHECK_PARTIAL(); - break; + if (memcmp(eptr, occhars, oclength) != 0) break; + eptr += oclength; } +#else /* without SUPPORT_UCP */ + else break; +#endif /* SUPPORT_UCP */ } if (possessive) continue; - for(;;) - { - RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr == pp) { RRETURN(MATCH_NOMATCH); } + { + RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr == pp) RRETURN(MATCH_NOMATCH); #ifdef SUPPORT_UCP - eptr--; - BACKCHAR(eptr); + eptr--; + BACKCHAR(eptr); #else /* without SUPPORT_UCP */ - eptr -= length; + eptr -= length; #endif /* SUPPORT_UCP */ - } + } } /* Control never gets here */ } @@ -2739,8 +2418,10 @@ for (;;) #endif /* SUPPORT_UTF8 */ /* When not in UTF-8 mode, load a single-byte character. */ - - fc = *ecode++; + { + if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + fc = *ecode++; + } /* The value of fc at this point is always less than 256, though we may or may not be in UTF-8 mode. The code is duplicated for the caseless and @@ -2758,14 +2439,7 @@ for (;;) { fc = md->lcc[fc]; for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); - } if (min == max) continue; if (minimize) { @@ -2773,13 +2447,9 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (fi >= max || eptr >= md->end_subject || + fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); - } - if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -2788,17 +2458,10 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc != md->lcc[*eptr]) break; + if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break; eptr++; } - if (possessive) continue; - while (eptr >= pp) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25); @@ -2814,31 +2477,16 @@ for (;;) else { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (fc != *eptr++) RRETURN(MATCH_NOMATCH); - } - + for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH); if (min == max) continue; - if (minimize) { for (fi = min;; fi++) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (fi >= max || eptr >= md->end_subject || fc != *eptr++) RRETURN(MATCH_NOMATCH); - } - if (fc != *eptr++) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -2847,16 +2495,10 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc != *eptr) break; + if (eptr >= md->end_subject || fc != *eptr) break; eptr++; } if (possessive) continue; - while (eptr >= pp) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27); @@ -2872,11 +2514,7 @@ for (;;) checking can be multibyte. */ case OP_NOT: - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); ecode++; GETCHARINCTEST(c, eptr); if ((ims & PCRE_CASELESS) != 0) @@ -2953,9 +2591,12 @@ for (;;) max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; - /* Common code for all repeated single-byte matches. */ + /* Common code for all repeated single-byte matches. We can give up quickly + if there are fewer than the minimum number of bytes left in the + subject. */ REPEATNOTCHAR: + if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); fc = *ecode++; /* The code is duplicated for the caseless and caseful cases, for speed, @@ -2980,11 +2621,6 @@ for (;;) register unsigned int d; for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } GETCHARINC(d, eptr); if (d < 256) d = md->lcc[d]; if (fc == d) RRETURN(MATCH_NOMATCH); @@ -2996,14 +2632,7 @@ for (;;) /* Not UTF-8 mode */ { for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); - } } if (min == max) continue; @@ -3019,15 +2648,11 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(d, eptr); if (d < 256) d = md->lcc[d]; if (fc == d) RRETURN(MATCH_NOMATCH); + } } else @@ -3038,13 +2663,8 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); - } - if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ @@ -3064,11 +2684,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(d, eptr, len); if (d < 256) d = md->lcc[d]; if (fc == d) break; @@ -3089,12 +2705,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc == md->lcc[*eptr]) break; + if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break; eptr++; } if (possessive) continue; @@ -3122,11 +2733,6 @@ for (;;) register unsigned int d; for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } GETCHARINC(d, eptr); if (fc == d) RRETURN(MATCH_NOMATCH); } @@ -3136,14 +2742,7 @@ for (;;) /* Not UTF-8 mode */ { for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if (fc == *eptr++) RRETURN(MATCH_NOMATCH); - } } if (min == max) continue; @@ -3159,12 +2758,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(d, eptr); if (fc == d) RRETURN(MATCH_NOMATCH); } @@ -3177,13 +2771,8 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (fi >= max || eptr >= md->end_subject || fc == *eptr++) RRETURN(MATCH_NOMATCH); - } - if (fc == *eptr++) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ @@ -3203,11 +2792,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(d, eptr, len); if (fc == d) break; eptr += len; @@ -3227,12 +2812,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (fc == *eptr) break; + if (eptr >= md->end_subject || fc == *eptr) break; eptr++; } if (possessive) continue; @@ -3326,10 +2906,13 @@ for (;;) /* First, ensure the minimum number of matches are present. Use inline code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). Separate the UTF-8 code completely as that + (i.e. keep it out of the loop). Also we can test that there are at least + the minimum number of bytes before we start. This isn't as effective in + UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that is tidier. Also separate the UCP code, which can be the same for both UTF-8 and single-bytes. */ + if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); if (min > 0) { #ifdef SUPPORT_UCP @@ -3341,11 +2924,7 @@ for (;;) if (prop_fail_result) RRETURN(MATCH_NOMATCH); for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); } break; @@ -3353,11 +2932,7 @@ for (;;) case PT_LAMP: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -3370,11 +2945,7 @@ for (;;) case PT_GC: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -3385,11 +2956,7 @@ for (;;) case PT_PC: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -3400,11 +2967,7 @@ for (;;) case PT_SC: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -3424,19 +2987,16 @@ for (;;) { for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH); while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; - else { GETCHARLEN(c, eptr, len); } + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } prop_category = UCD_CATEGORY(c); if (prop_category != ucp_M) break; eptr += len; @@ -3455,12 +3015,8 @@ for (;;) case OP_ANY: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); - } - if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -3469,29 +3025,20 @@ for (;;) case OP_ALLANY: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } break; case OP_ANYBYTE: - if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); eptr += min; break; case OP_ANYNL: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); switch(c) { @@ -3517,11 +3064,7 @@ for (;;) case OP_NOT_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); switch(c) { @@ -3553,11 +3096,7 @@ for (;;) case OP_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); switch(c) { @@ -3589,11 +3128,7 @@ for (;;) case OP_NOT_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); switch(c) { @@ -3613,11 +3148,7 @@ for (;;) case OP_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); switch(c) { @@ -3637,11 +3168,7 @@ for (;;) case OP_NOT_DIGIT: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); @@ -3651,12 +3178,8 @@ for (;;) case OP_DIGIT: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) + if (eptr >= md->end_subject || + *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); /* No need to skip more bytes - we know it's a 1-byte character */ } @@ -3665,12 +3188,8 @@ for (;;) case OP_NOT_WHITESPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) + if (eptr >= md->end_subject || + (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)) RRETURN(MATCH_NOMATCH); while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); } @@ -3679,12 +3198,8 @@ for (;;) case OP_WHITESPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) + if (eptr >= md->end_subject || + *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); /* No need to skip more bytes - we know it's a 1-byte character */ } @@ -3693,12 +3208,8 @@ for (;;) case OP_NOT_WORDCHAR: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) + if (eptr >= md->end_subject || + (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)) RRETURN(MATCH_NOMATCH); while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); } @@ -3707,12 +3218,8 @@ for (;;) case OP_WORDCHAR: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) + if (eptr >= md->end_subject || + *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); /* No need to skip more bytes - we know it's a 1-byte character */ } @@ -3726,49 +3233,34 @@ for (;;) #endif /* SUPPORT_UTF8 */ /* Code for the non-UTF-8 case for minimum matching of operators other - than OP_PROP and OP_NOTPROP. */ + than OP_PROP and OP_NOTPROP. We can assume that there are the minimum + number of bytes present, as this was tested above. */ switch(ctype) { case OP_ANY: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); eptr++; } break; case OP_ALLANY: - if (eptr > md->end_subject - min) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } eptr += min; break; case OP_ANYBYTE: - if (eptr > md->end_subject - min) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } eptr += min; break; + /* Because of the CRLF case, we can't assume the minimum number of + bytes are present in this case. */ + case OP_ANYNL: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); switch(*eptr++) { default: RRETURN(MATCH_NOMATCH); @@ -3790,11 +3282,7 @@ for (;;) case OP_NOT_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); switch(*eptr++) { default: break; @@ -3809,11 +3297,7 @@ for (;;) case OP_HSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); switch(*eptr++) { default: RRETURN(MATCH_NOMATCH); @@ -3828,11 +3312,7 @@ for (;;) case OP_NOT_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); switch(*eptr++) { default: break; @@ -3849,11 +3329,7 @@ for (;;) case OP_VSPACE: for (i = 1; i <= min; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); switch(*eptr++) { default: RRETURN(MATCH_NOMATCH); @@ -3869,76 +3345,34 @@ for (;;) case OP_NOT_DIGIT: for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); - } break; case OP_DIGIT: for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); - } break; case OP_NOT_WHITESPACE: for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); - } break; case OP_WHITESPACE: for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); - } break; case OP_NOT_WORDCHAR: for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if ((md->ctypes[*eptr++] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); - } break; case OP_WORDCHAR: for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } if ((md->ctypes[*eptr++] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); - } break; default: @@ -3966,12 +3400,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (prop_fail_result) RRETURN(MATCH_NOMATCH); } @@ -3982,12 +3411,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -4002,12 +3426,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -4020,12 +3439,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -4038,12 +3452,7 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -4065,20 +3474,17 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH); while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; - else { GETCHARLEN(c, eptr, len); } + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } prop_category = UCD_CATEGORY(c); if (prop_category != ucp_M) break; eptr += len; @@ -4097,14 +3503,10 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (ctype == OP_ANY && IS_NEWLINE(eptr)) + if (fi >= max || eptr >= md->end_subject || + (ctype == OP_ANY && IS_NEWLINE(eptr))) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); switch(ctype) { @@ -4260,14 +3662,10 @@ for (;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (fi >= max) RRETURN(MATCH_NOMATCH); - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - RRETURN(MATCH_NOMATCH); - } - if (ctype == OP_ANY && IS_NEWLINE(eptr)) + if (fi >= max || eptr >= md->end_subject || + (ctype == OP_ANY && IS_NEWLINE(eptr))) RRETURN(MATCH_NOMATCH); + c = *eptr++; switch(ctype) { @@ -4392,11 +3790,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (prop_fail_result) break; eptr+= len; @@ -4407,11 +3801,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -4426,11 +3816,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -4443,11 +3829,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -4460,11 +3842,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -4493,11 +3871,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) break; @@ -4517,7 +3891,6 @@ for (;;) /* eptr is now past the end of the maximum run */ if (possessive) continue; - for(;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45); @@ -4553,12 +3926,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -4570,12 +3938,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -4587,11 +3950,7 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -4604,22 +3963,15 @@ for (;;) case OP_ANYBYTE: c = max - min; if (c > (unsigned int)(md->end_subject - eptr)) - { - eptr = md->end_subject; - SCHECK_PARTIAL(); - } - else eptr += c; + c = md->end_subject - eptr; + eptr += c; break; case OP_ANYNL: for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c == 0x000d) { @@ -4644,11 +3996,7 @@ for (;;) { BOOL gotspace; int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); switch(c) { @@ -4686,11 +4034,7 @@ for (;;) { BOOL gotspace; int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); switch(c) { @@ -4714,11 +4058,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; eptr+= len; @@ -4729,11 +4069,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; eptr+= len; @@ -4744,11 +4080,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; eptr+= len; @@ -4759,11 +4091,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; eptr+= len; @@ -4774,11 +4102,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; eptr+= len; @@ -4789,11 +4113,7 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; eptr+= len; @@ -4825,12 +4145,7 @@ for (;;) case OP_ANY: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } - if (IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; eptr++; } break; @@ -4839,21 +4154,14 @@ for (;;) case OP_ANYBYTE: c = max - min; if (c > (unsigned int)(md->end_subject - eptr)) - { - eptr = md->end_subject; - SCHECK_PARTIAL(); - } - else eptr += c; + c = md->end_subject - eptr; + eptr += c; break; case OP_ANYNL: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; c = *eptr; if (c == 0x000d) { @@ -4874,11 +4182,7 @@ for (;;) case OP_NOT_HSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; c = *eptr; if (c == 0x09 || c == 0x20 || c == 0xa0) break; eptr++; @@ -4888,11 +4192,7 @@ for (;;) case OP_HSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; c = *eptr; if (c != 0x09 && c != 0x20 && c != 0xa0) break; eptr++; @@ -4902,11 +4202,7 @@ for (;;) case OP_NOT_VSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; c = *eptr; if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85) break; @@ -4917,11 +4213,7 @@ for (;;) case OP_VSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); - break; - } + if (eptr >= md->end_subject) break; c = *eptr; if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85) break; @@ -4932,12 +4224,8 @@ for (;;) case OP_NOT_DIGIT: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) break; - } - if ((md->ctypes[*eptr] & ctype_digit) != 0) break; eptr++; } break; @@ -4945,12 +4233,8 @@ for (;;) case OP_DIGIT: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) break; - } - if ((md->ctypes[*eptr] & ctype_digit) == 0) break; eptr++; } break; @@ -4958,12 +4242,8 @@ for (;;) case OP_NOT_WHITESPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) break; - } - if ((md->ctypes[*eptr] & ctype_space) != 0) break; eptr++; } break; @@ -4971,12 +4251,8 @@ for (;;) case OP_WHITESPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) break; - } - if ((md->ctypes[*eptr] & ctype_space) == 0) break; eptr++; } break; @@ -4984,12 +4260,8 @@ for (;;) case OP_NOT_WORDCHAR: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) break; - } - if ((md->ctypes[*eptr] & ctype_word) != 0) break; eptr++; } break; @@ -4997,12 +4269,8 @@ for (;;) case OP_WORDCHAR: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) - { - SCHECK_PARTIAL(); + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) break; - } - if ((md->ctypes[*eptr] & ctype_word) == 0) break; eptr++; } break; @@ -5180,7 +4448,6 @@ const uschar *tables; const uschar *start_bits = NULL; USPTR start_match = (USPTR)subject + start_offset; USPTR end_subject; -USPTR start_partial = NULL; USPTR req_byte_ptr = start_match - 1; pcre_study_data internal_study; @@ -5197,13 +4464,6 @@ if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; -/* This information is for finding all the numbers associated with a given -name, for condition testing. */ - -md->name_table = (uschar *)re + re->name_table_offset; -md->name_count = re->name_count; -md->name_entry_size = re->name_entry_size; - /* Fish out the optional data from the extra_data structure, first setting the default values. */ @@ -5271,9 +4531,7 @@ md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; md->notbol = (options & PCRE_NOTBOL) != 0; md->noteol = (options & PCRE_NOTEOL) != 0; md->notempty = (options & PCRE_NOTEMPTY) != 0; -md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; -md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 : - ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0; +md->partial = (options & PCRE_PARTIAL) != 0; md->hitend = FALSE; md->recursive = NULL; /* No recursion at top level */ @@ -5347,9 +4605,8 @@ else } } -/* Partial matching was originally supported only for a restricted set of -regexes; from release 8.00 there are no restrictions, but the bits are still -defined (though never set). So there's no harm in leaving this code. */ +/* Partial matching is supported only for a restricted set of regexes at the +moment. */ if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) return PCRE_ERROR_BADPARTIAL; @@ -5436,7 +4693,7 @@ if (!anchored) } else if (!startline && study != NULL && - (study->flags & PCRE_STUDY_MAPPED) != 0) + (study->options & PCRE_STUDY_MAPPED) != 0) start_bits = study->start_bits; } @@ -5563,94 +4820,79 @@ for(;;) end_subject = save_end_subject; - /* The following two optimizations are disabled for partial matching or if +#ifdef DEBUG /* Sigh. Some compilers never learn. */ + printf(">>>> Match against: "); + pchars(start_match, end_subject - start_match, TRUE, md); + printf("\n"); +#endif + + /* If req_byte is set, we know that that character must appear in the + subject for the match to succeed. If the first character is set, req_byte + must be later in the subject; otherwise the test starts at the match point. + This optimization can save a huge amount of backtracking in patterns with + nested unlimited repeats that aren't going to match. Writing separate code + for cased/caseless versions makes it go faster, as does using an + autoincrement and backing off on a match. + + HOWEVER: when the subject string is very, very long, searching to its end + can take a long time, and give bad performance on quite ordinary patterns. + This showed up when somebody was matching something like /^\d+C/ on a + 32-megabyte string... so we don't do this when the string is sufficiently + long. + + ALSO: this processing is disabled when partial matching is requested, or if disabling is explicitly requested. */ - if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial) + if ((options & PCRE_NO_START_OPTIMIZE) == 0 && + req_byte >= 0 && + end_subject - start_match < REQ_BYTE_MAX && + !md->partial) { - /* If the pattern was studied, a minimum subject length may be set. This is - a lower bound; no actual string of that length may actually match the - pattern. Although the value is, strictly, in characters, we treat it as - bytes to avoid spending too much time in this optimization. */ + register USPTR p = start_match + ((first_byte >= 0)? 1 : 0); - if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && - (pcre_uint32)(end_subject - start_match) < study->minlength) - { - rc = MATCH_NOMATCH; - break; - } + /* We don't need to repeat the search if we haven't yet reached the + place we found it at last time. */ - /* If req_byte is set, we know that that character must appear in the - subject for the match to succeed. If the first character is set, req_byte - must be later in the subject; otherwise the test starts at the match point. - This optimization can save a huge amount of backtracking in patterns with - nested unlimited repeats that aren't going to match. Writing separate code - for cased/caseless versions makes it go faster, as does using an - autoincrement and backing off on a match. - - HOWEVER: when the subject string is very, very long, searching to its end - can take a long time, and give bad performance on quite ordinary patterns. - This showed up when somebody was matching something like /^\d+C/ on a - 32-megabyte string... so we don't do this when the string is sufficiently - long. */ - - if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX) + if (p > req_byte_ptr) { - register USPTR p = start_match + ((first_byte >= 0)? 1 : 0); - - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ - - if (p > req_byte_ptr) + if (req_byte_caseless) { - if (req_byte_caseless) + while (p < end_subject) { - while (p < end_subject) - { - register int pp = *p++; - if (pp == req_byte || pp == req_byte2) { p--; break; } - } + register int pp = *p++; + if (pp == req_byte || pp == req_byte2) { p--; break; } } - else + } + else + { + while (p < end_subject) { - while (p < end_subject) - { - if (*p++ == req_byte) { p--; break; } - } + if (*p++ == req_byte) { p--; break; } } + } - /* If we can't find the required character, break the matching loop, - forcing a match failure. */ + /* If we can't find the required character, break the matching loop, + forcing a match failure. */ - if (p >= end_subject) - { - rc = MATCH_NOMATCH; - break; - } + if (p >= end_subject) + { + rc = MATCH_NOMATCH; + break; + } - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ + /* If we have found the required character, save the point where we + found it, so that we don't search again next time round the loop if + the start hasn't passed this character yet. */ - req_byte_ptr = p; - } + req_byte_ptr = p; } } -#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */ - printf(">>>> Match against: "); - pchars(start_match, end_subject - start_match, TRUE, md); - printf("\n"); -#endif - - /* OK, we can now run the match. If "hitend" is set afterwards, remember the - first starting point for which a partial match was found. */ + /* OK, we can now run the match. */ md->start_match_ptr = start_match; - md->start_used_ptr = start_match; md->match_call_count = 0; rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0); - if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; switch(rc) { @@ -5680,7 +4922,7 @@ for(;;) rc = MATCH_NOMATCH; goto ENDLOOP; - /* Any other return is either a match, or some kind of error. */ + /* Any other return is some kind of error. */ default: goto ENDLOOP; @@ -5786,19 +5028,14 @@ if (using_temporary_offsets) (pcre_free)(md->offset_vector); } -if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL) +if (rc != MATCH_NOMATCH) { DPRINTF((">>>> error: returning %d\n", rc)); return rc; } -else if (start_partial != NULL) +else if (md->partial && md->hitend) { DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); - if (offsetcount > 1) - { - offsets[0] = start_partial - (USPTR)subject; - offsets[1] = end_subject - (USPTR)subject; - } return PCRE_ERROR_PARTIAL; } else |
