diff options
| author | Ilia Alshanetsky <iliaa@php.net> | 2010-02-03 12:59:00 +0000 |
|---|---|---|
| committer | Ilia Alshanetsky <iliaa@php.net> | 2010-02-03 12:59:00 +0000 |
| commit | 91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8 (patch) | |
| tree | 1817d9652f110cf0cd4644ed8586ca56d0c05d9e /ext/pcre/pcrelib/pcre_compile.c | |
| parent | 43d5429381237518ced74149f29a851c30307bea (diff) | |
| download | php-git-91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8.tar.gz | |
Downgrade bundled PCRE to version 7.9 due to 8.0+ version use of C99
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
| -rw-r--r-- | ext/pcre/pcrelib/pcre_compile.c | 396 |
1 files changed, 76 insertions, 320 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c index eaf3d90ce8..1e0672c5cd 100644 --- a/ext/pcre/pcrelib/pcre_compile.c +++ b/ext/pcre/pcrelib/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2010 University of Cambridge + Copyright (c) 1997-2009 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -51,11 +51,10 @@ supporting internal functions that are not used by other modules. */ #include "pcre_internal.h" -/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is -also used by pcretest. PCRE_DEBUG is not defined when building a production -library. */ +/* When DEBUG is defined, we need the pcre_printint() function, which is also +used by pcretest. DEBUG is not defined when building a production library. */ -#ifdef PCRE_DEBUG +#ifdef DEBUG #include "pcre_printint.src" #endif @@ -340,9 +339,7 @@ static const char error_texts[] = "number is too big\0" "subpattern name expected\0" "digit expected after (?+\0" - "] is an invalid data character in JavaScript compatibility mode\0" - /* 65 */ - "different names for subpatterns of the same number are not allowed"; + "] is an invalid data character in JavaScript compatibility mode"; /* Table to identify digits and hex digits. This is used when compiling @@ -1101,7 +1098,6 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS) if (name != NULL && lorn == ptr - thisname && strncmp((const char *)name, (const char *)thisname, lorn) == 0) return *count; - term++; } } } @@ -1136,21 +1132,19 @@ for (; *ptr != 0; ptr++) BOOL negate_class = FALSE; for (;;) { - if (ptr[1] == CHAR_BACKSLASH) + int c = *(++ptr); + if (c == CHAR_BACKSLASH) { - if (ptr[2] == CHAR_E) - ptr+= 2; - else if (strncmp((const char *)ptr+2, + if (ptr[1] == CHAR_E) + ptr++; + else if (strncmp((const char *)ptr+1, STR_Q STR_BACKSLASH STR_E, 3) == 0) - ptr += 4; + ptr += 3; else break; } - else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT) - { + else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) negate_class = TRUE; - ptr++; - } else break; } @@ -1316,9 +1310,7 @@ for (;;) case OP_CALLOUT: case OP_CREF: - case OP_NCREF: case OP_RREF: - case OP_NRREF: case OP_DEF: code += _pcre_OP_lengths[*code]; break; @@ -1334,34 +1326,23 @@ for (;;) /************************************************* -* Find the fixed length of a branch * +* Find the fixed length of a pattern * *************************************************/ -/* Scan a branch and compute the fixed length of subject that will match it, +/* Scan a pattern and compute the fixed length of subject that will match it, if the length is fixed. This is needed for dealing with backward assertions. -In UTF8 mode, the result is in characters rather than bytes. The branch is -temporarily terminated with OP_END when this function is called. - -This function is called when a backward assertion is encountered, so that if it -fails, the error message can point to the correct place in the pattern. -However, we cannot do this when the assertion contains subroutine calls, -because they can be forward references. We solve this by remembering this case -and doing the check at the end; a flag specifies which mode we are running in. +In UTF8 mode, the result is in characters rather than bytes. Arguments: code points to the start of the pattern (the bracket) options the compiling options - atend TRUE if called when the pattern is complete - cd the "compile data" structure -Returns: the fixed length, - or -1 if there is no fixed length, +Returns: the fixed length, or -1 if there is no fixed length, or -2 if \C was encountered - or -3 if an OP_RECURSE item was encountered and atend is FALSE */ static int -find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd) +find_fixedlength(uschar *code, int options) { int length = -1; @@ -1374,7 +1355,6 @@ branch, check the length against that of the other branches. */ for (;;) { int d; - uschar *ce, *cs; register int op = *cc; switch (op) { @@ -1382,7 +1362,7 @@ for (;;) case OP_BRA: case OP_ONCE: case OP_COND: - d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd); + d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -1405,21 +1385,6 @@ for (;;) branchlength = 0; break; - /* A true recursion implies not fixed length, but a subroutine call may - be OK. If the subroutine is a forward reference, we can't deal with - it until the end of the pattern, so return -3. */ - - case OP_RECURSE: - if (!atend) return -3; - cs = ce = (uschar *)cd->start_code + GET(cc, 1); /* Start subpattern */ - do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ - if (cc > cs && cc < ce) return -1; /* Recursion */ - d = find_fixedlength(cs + 2, options, atend, cd); - if (d < 0) return d; - branchlength += d; - cc += 1 + LINK_SIZE; - break; - /* Skip over assertive subpatterns */ case OP_ASSERT: @@ -1433,9 +1398,7 @@ for (;;) case OP_REVERSE: case OP_CREF: - case OP_NCREF: case OP_RREF: - case OP_NRREF: case OP_DEF: case OP_OPT: case OP_CALLOUT: @@ -1458,8 +1421,10 @@ for (;;) branchlength++; cc += 2; #ifdef SUPPORT_UTF8 - if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0) - cc += _pcre_utf8_table4[cc[-1] & 0x3f]; + if ((options & PCRE_UTF8) != 0) + { + while ((*cc & 0xc0) == 0x80) cc++; + } #endif break; @@ -1470,8 +1435,10 @@ for (;;) branchlength += GET2(cc,1); cc += 4; #ifdef SUPPORT_UTF8 - if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0) - cc += _pcre_utf8_table4[cc[-1] & 0x3f]; + if ((options & PCRE_UTF8) != 0) + { + while((*cc & 0x80) == 0x80) cc++; + } #endif break; @@ -1550,25 +1517,22 @@ for (;;) /************************************************* -* Scan compiled regex for specific bracket * +* Scan compiled regex for numbered bracket * *************************************************/ /* This little function scans through a compiled pattern until it finds a -capturing bracket with the given number, or, if the number is negative, an -instance of OP_REVERSE for a lookbehind. The function is global in the C sense -so that it can be called from pcre_study() when finding the minimum matching -length. +capturing bracket with the given number. Arguments: code points to start of expression utf8 TRUE in UTF-8 mode - number the required bracket number or negative to find a lookbehind + number the required bracket number Returns: pointer to the opcode for the bracket, or NULL if not found */ -const uschar * -_pcre_find_bracket(const uschar *code, BOOL utf8, int number) +static const uschar * +find_bracket(const uschar *code, BOOL utf8, int number) { for (;;) { @@ -1581,14 +1545,6 @@ for (;;) if (c == OP_XCLASS) code += GET(code, 1); - /* Handle recursion */ - - else if (c == OP_REVERSE) - { - if (number < 0) return (uschar *)code; - code += _pcre_OP_lengths[c]; - } - /* Handle capturing bracket */ else if (c == OP_CBRA) @@ -1954,13 +1910,10 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE case OP_QUERY: case OP_MINQUERY: case OP_POSQUERY: - if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f]; - break; - case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: - if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; + if (utf8) while ((code[2] & 0xc0) == 0x80) code++; break; #endif } @@ -1993,10 +1946,9 @@ static BOOL could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, BOOL utf8) { -while (bcptr != NULL && bcptr->current_branch >= code) +while (bcptr != NULL && bcptr->current >= code) { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8)) - return FALSE; + if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE; bcptr = bcptr->outer; } return TRUE; @@ -2658,7 +2610,7 @@ BOOL utf8 = FALSE; uschar *utf8_char = NULL; #endif -#ifdef PCRE_DEBUG +#ifdef DEBUG if (lengthptr != NULL) DPRINTF((">> start branch\n")); #endif @@ -2717,7 +2669,7 @@ for (;; ptr++) if (lengthptr != NULL) { -#ifdef PCRE_DEBUG +#ifdef DEBUG if (code > cd->hwm) cd->hwm = code; /* High water info */ #endif if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */ @@ -3915,15 +3867,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (repeat_max == 0) goto END_REPEAT; - /*--------------------------------------------------------------------*/ - /* This code is obsolete from release 8.00; the restriction was finally - removed: */ - /* All real repeats make it impossible to handle partial matching (maybe one day we will be able to remove this restriction). */ - /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */ - /*--------------------------------------------------------------------*/ + if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; /* Combine the op_type with the repeat_type */ @@ -4070,15 +4017,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ goto END_REPEAT; } - /*--------------------------------------------------------------------*/ - /* This code is obsolete from release 8.00; the restriction was finally - removed: */ - /* All real repeats make it impossible to handle partial matching (maybe one day we will be able to remove this restriction). */ - /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */ - /*--------------------------------------------------------------------*/ + if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; if (repeat_min == 0 && repeat_max == -1) *code++ = OP_CRSTAR + repeat_type; @@ -4213,15 +4155,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { /* In the pre-compile phase, we don't actually do the replication. We just adjust the length as if we had. Do some paranoid checks for - potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit - integer type when available, otherwise double. */ + potential integer overflow. */ if (lengthptr != NULL) { int delta = (repeat_min - 1)*length_prevgroup; - if ((INT64_OR_DOUBLE)(repeat_min - 1)* - (INT64_OR_DOUBLE)length_prevgroup > - (INT64_OR_DOUBLE)INT_MAX || + if ((double)(repeat_min - 1)*(double)length_prevgroup > + (double)INT_MAX || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -4267,16 +4207,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ just adjust the length as if we had. For each repetition we must add 1 to the length for BRAZERO and for all but the last repetition we must add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some - paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is - a 64-bit integer type when available, otherwise double. */ + paranoid checks to avoid integer overflow. */ if (lengthptr != NULL && repeat_max > 0) { int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - 2 - 2*LINK_SIZE; /* Last one doesn't nest */ - if ((INT64_OR_DOUBLE)repeat_max * - (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - > (INT64_OR_DOUBLE)INT_MAX || + if ((double)repeat_max * + (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) + > (double)INT_MAX || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -4396,20 +4335,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (possessive_quantifier) { int len; - - if (*tempcode == OP_TYPEEXACT) + if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT || + *tempcode == OP_NOTEXACT) tempcode += _pcre_OP_lengths[*tempcode] + - ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0); - - else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) - { - tempcode += _pcre_OP_lengths[*tempcode]; -#ifdef SUPPORT_UTF8 - if (utf8 && tempcode[-1] >= 0xc0) - tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f]; -#endif - } - + ((*tempcode == OP_TYPEEXACT && + (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0); len = code - tempcode; if (len > 0) switch (*tempcode) { @@ -4487,19 +4417,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (namelen == verbs[i].len && strncmp((char *)name, vn, namelen) == 0) { - /* Check for open captures before ACCEPT */ - - if (verbs[i].op == OP_ACCEPT) - { - open_capitem *oc; - cd->had_accept = TRUE; - for (oc = cd->open_caps; oc != NULL; oc = oc->next) - { - *code++ = OP_CLOSE; - PUT2INC(code, 0, oc->number); - } - } - *code++ = verbs[i].op; + *code = verbs[i].op; + if (*code++ == OP_ACCEPT) cd->had_accept = TRUE; break; } vn += verbs[i].len + 1; @@ -4661,10 +4580,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ } /* Otherwise (did not start with "+" or "-"), start by looking for the - name. If we find a name, add one to the opcode to change OP_CREF or - OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same, - except they record that the reference was originally to a name. The - information is used to check duplicate names. */ + name. */ slot = cd->name_table; for (i = 0; i < cd->names_found; i++) @@ -4679,7 +4595,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { recno = GET2(slot, 0); PUT2(code, 2+LINK_SIZE, recno); - code[1+LINK_SIZE]++; } /* Search the pattern for a forward reference */ @@ -4688,7 +4603,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ (options & PCRE_EXTENDED) != 0)) > 0) { PUT2(code, 2+LINK_SIZE, i); - code[1+LINK_SIZE]++; } /* If terminator == 0 it means that the name followed directly after @@ -4881,24 +4795,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ } } - /* In the real compile, create the entry in the table, maintaining - alphabetical order. Duplicate names for different numbers are - permitted only if PCRE_DUPNAMES is set. Duplicate names for the same - number are always OK. (An existing number can be re-used if (?| - appears in the pattern.) In either event, a duplicate name results in - a duplicate entry in the table, even if the number is the same. This - is because the number of names, and hence the table size, is computed - in the pre-compile, and it affects various numbers and pointers which - would all have to be modified, and the compiled code moved down, if - duplicates with the same number were omitted from the table. This - doesn't seem worth the hassle. However, *different* names for the - same number are not permitted. */ + /* In the real compile, create the entry in the table */ else { - BOOL dupname = FALSE; slot = cd->name_table; - for (i = 0; i < cd->names_found; i++) { int crc = memcmp(name, slot+2, namelen); @@ -4906,66 +4807,33 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { if (slot[2+namelen] == 0) { - if (GET2(slot, 0) != cd->bracount + 1 && - (options & PCRE_DUPNAMES) == 0) + if ((options & PCRE_DUPNAMES) == 0) { *errorcodeptr = ERR43; goto FAILED; } - else dupname = TRUE; } - else crc = -1; /* Current name is a substring */ + else crc = -1; /* Current name is substring */ } - - /* Make space in the table and break the loop for an earlier - name. For a duplicate or later name, carry on. We do this for - duplicates so that in the simple case (when ?(| is not used) they - are in order of their numbers. */ - if (crc < 0) { memmove(slot + cd->name_entry_size, slot, (cd->names_found - i) * cd->name_entry_size); break; } - - /* Continue the loop for a later or duplicate name */ - slot += cd->name_entry_size; } - /* For non-duplicate names, check for a duplicate number before - adding the new name. */ - - if (!dupname) - { - uschar *cslot = cd->name_table; - for (i = 0; i < cd->names_found; i++) - { - if (cslot != slot) - { - if (GET2(cslot, 0) == cd->bracount + 1) - { - *errorcodeptr = ERR65; - goto FAILED; - } - } - else i--; - cslot += cd->name_entry_size; - } - } - PUT2(slot, 0, cd->bracount + 1); memcpy(slot + 2, name, namelen); slot[2+namelen] = 0; } } - /* In both pre-compile and compile, count the number of names we've - encountered. */ + /* In both cases, count the number of names we've encountered. */ - cd->names_found++; ptr++; /* Move past > or ' */ + cd->names_found++; goto NUMBERED_GROUP; @@ -5134,8 +5002,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (lengthptr == NULL) { *code = OP_END; - if (recno != 0) - called = _pcre_find_bracket(cd->start_code, utf8, recno); + if (recno != 0) called = find_bracket(cd->start_code, utf8, recno); /* Forward reference */ @@ -5251,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { cd->external_options = newoptions; } - else + else { if ((options & PCRE_IMS) != (newoptions & PCRE_IMS)) { @@ -5588,7 +5455,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (-c >= ESC_REF) { - open_capitem *oc; recno = -c - ESC_REF; HANDLE_REFERENCE: /* Come here from named backref handling */ @@ -5598,19 +5464,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ PUT2INC(code, 0, recno); cd->backref_map |= (recno < 32)? (1 << recno) : 1; if (recno > cd->top_backref) cd->top_backref = recno; - - /* Check to see if this back reference is recursive, that it, it - is inside the group that it references. A flag is set so that the - group can be made atomic. */ - - for (oc = cd->open_caps; oc != NULL; oc = oc->next) - { - if (oc->number == recno) - { - oc->flag = TRUE; - break; - } - } } /* So are Unicode property matches, if supported. */ @@ -5793,18 +5646,15 @@ uschar *code = *codeptr; uschar *last_branch = code; uschar *start_bracket = code; uschar *reverse_count = NULL; -open_capitem capitem; -int capnumber = 0; int firstbyte, reqbyte; int branchfirstbyte, branchreqbyte; int length; int orig_bracount; int max_bracount; -int old_external_options = cd->external_options; branch_chain bc; bc.outer = bcptr; -bc.current_branch = code; +bc.current = code; firstbyte = reqbyte = REQ_UNSET; @@ -5822,19 +5672,6 @@ the code that abstracts option settings at the start of the pattern and makes them global. It tests the value of length for (2 + 2*LINK_SIZE) in the pre-compile phase to find out whether anything has yet been compiled or not. */ -/* If this is a capturing subpattern, add to the chain of open capturing items -so that we can detect them if (*ACCEPT) is encountered. This is also used to -detect groups that contain recursive back references to themselves. */ - -if (*code == OP_CBRA) - { - capnumber = GET2(code, 1 + LINK_SIZE); - capitem.number = capnumber; - capitem.next = cd->open_caps; - capitem.flag = FALSE; - cd->open_caps = &capitem; - } - /* Offset is set zero to mark that this bracket is still open */ PUT(code, 1, 0); @@ -5879,15 +5716,6 @@ for (;;) return FALSE; } - /* If the external options have changed during this branch, it means that we - are at the top level, and a leading option setting has been encountered. We - need to re-set the original option values to take account of this so that, - during the pre-compile phase, we know to allow for a re-set at the start of - subsequent branches. */ - - if (old_external_options != cd->external_options) - oldims = cd->external_options & PCRE_IMS; - /* Keep the highest bracket count in case (?| was used and some branch has fewer than the rest. */ @@ -5938,29 +5766,21 @@ for (;;) /* If lookbehind, check that this branch matches a fixed-length string, and put the length into the OP_REVERSE item. Temporarily mark the end of the - branch with OP_END. If the branch contains OP_RECURSE, the result is -3 - because there may be forward references that we can't check here. Set a - flag to cause another lookbehind check at the end. Why not do it all at the - end? Because common, erroneous checks are picked up here and the offset of - the problem can be shown. */ + branch with OP_END. */ if (lookbehind) { int fixed_length; *code = OP_END; - fixed_length = find_fixedlength(last_branch, options, FALSE, cd); + fixed_length = find_fixedlength(last_branch, options); DPRINTF(("fixed length = %d\n", fixed_length)); - if (fixed_length == -3) - { - cd->check_lookbehind = TRUE; - } - else if (fixed_length < 0) + if (fixed_length < 0) { *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25; *ptrptr = ptr; return FALSE; } - else { PUT(reverse_count, 0, fixed_length); } + PUT(reverse_count, 0, fixed_length); } } @@ -5994,28 +5814,7 @@ for (;;) PUT(code, 1, code - start_bracket); code += 1 + LINK_SIZE; - /* If it was a capturing subpattern, check to see if it contained any - recursive back references. If so, we must wrap it in atomic brackets. - In any event, remove the block from the chain. */ - - if (capnumber > 0) - { - if (cd->open_caps->flag) - { - memmove(start_bracket + 1 + LINK_SIZE, start_bracket, - code - start_bracket); - *start_bracket = OP_ONCE; - code += 1 + LINK_SIZE; - PUT(start_bracket, 1, code - start_bracket); - *code = OP_KET; - PUT(code, 1, code - start_bracket); - code += 1 + LINK_SIZE; - length += 2 + 2*LINK_SIZE; - } - cd->open_caps = cd->open_caps->next; - } - - /* Reset options if needed. */ + /* Resetting option if needed */ if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS) { @@ -6064,7 +5863,7 @@ for (;;) { *code = OP_ALT; PUT(code, 1, code - last_branch); - bc.current_branch = last_branch = code; + bc.current = last_branch = code; code += 1 + LINK_SIZE; } @@ -6211,9 +6010,7 @@ do { switch (*scode) { case OP_CREF: - case OP_NCREF: case OP_RREF: - case OP_NRREF: case OP_DEF: return FALSE; @@ -6382,7 +6179,9 @@ int length = 1; /* For final END opcode */ int firstbyte, reqbyte, newline; int errorcode = 0; int skipatstart = 0; -BOOL utf8 = (options & PCRE_UTF8) != 0; +#ifdef SUPPORT_UTF8 +BOOL utf8; +#endif size_t size; uschar *code; const uschar *codestart; @@ -6479,14 +6278,15 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && /* Can't support UTF8 unless PCRE has been compiled to include the code. */ #ifdef SUPPORT_UTF8 +utf8 = (options & PCRE_UTF8) != 0; if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && - (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0) + (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) { errorcode = ERR44; goto PCRE_EARLY_ERROR_RETURN2; } #else -if (utf8) +if ((options & PCRE_UTF8) != 0) { errorcode = ERR32; goto PCRE_EARLY_ERROR_RETURN; @@ -6575,7 +6375,6 @@ cd->end_pattern = (const uschar *)(pattern + strlen(pattern)); cd->req_varyopt = 0; cd->external_options = options; cd->external_flags = 0; -cd->open_caps = NULL; /* Now do the pre-compile. On error, errorcode will be set non-zero, so we don't need to look at the result of the function here. The initial options have @@ -6650,8 +6449,6 @@ cd->start_code = codestart; cd->hwm = cworkspace; cd->req_varyopt = 0; cd->had_accept = FALSE; -cd->check_lookbehind = FALSE; -cd->open_caps = NULL; /* Set up a starting, non-extracting bracket, then compile the expression. On error, errorcode will be set non-zero, so we don't need to look at the result @@ -6677,7 +6474,7 @@ if debugging, leave the test till after things are printed out. */ *code++ = OP_END; -#ifndef PCRE_DEBUG +#ifndef DEBUG if (code - codestart > length) errorcode = ERR23; #endif @@ -6690,7 +6487,7 @@ while (errorcode == 0 && cd->hwm > cworkspace) cd->hwm -= LINK_SIZE; offset = GET(cd->hwm, 0); recno = GET(codestart, offset); - groupptr = _pcre_find_bracket(codestart, utf8, recno); + groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno); if (groupptr == NULL) errorcode = ERR53; else PUT(((uschar *)codestart), offset, groupptr - codestart); } @@ -6700,47 +6497,6 @@ subpattern. */ if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; -/* If there were any lookbehind assertions that contained OP_RECURSE -(recursions or subroutine calls), a flag is set for them to be checked here, -because they may contain forward references. Actual recursions can't be fixed -length, but subroutine calls can. It is done like this so that those without -OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The -exceptional ones forgo this. We scan the pattern to check that they are fixed -length, and set their lengths. */ - -if (cd->check_lookbehind) - { - uschar *cc = (uschar *)codestart; - - /* Loop, searching for OP_REVERSE items, and process those that do not have - their length set. (Actually, it will also re-process any that have a length - of zero, but that is a pathological case, and it does no harm.) When we find - one, we temporarily terminate the branch it is in while we scan it. */ - - for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1); - cc != NULL; - cc = (uschar *)_pcre_find_bracket(cc, utf8, -1)) - { - if (GET(cc, 1) == 0) - { - int fixed_length; - uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); - int end_op = *be; - *be = OP_END; - fixed_length = find_fixedlength(cc, re->options, TRUE, cd); - *be = end_op; - DPRINTF(("fixed length = %d\n", fixed_length)); - if (fixed_length < 0) - { - errorcode = (fixed_length == -2)? ERR36 : ERR25; - break; - } - PUT(cc, 1, fixed_length); - } - cc += 1 + LINK_SIZE; - } - } - /* Failed to compile, or error while post-processing */ if (errorcode != 0) @@ -6801,7 +6557,7 @@ if (reqbyte >= 0 && /* Print out the compiled data if debugging is enabled. This is never the case when building a production library. */ -#ifdef PCRE_DEBUG +#ifdef DEBUG printf("Length = %d top_bracket = %d top_backref = %d\n", length, re->top_bracket, re->top_backref); @@ -6839,7 +6595,7 @@ if (code - codestart > length) if (errorcodeptr != NULL) *errorcodeptr = ERR23; return NULL; } -#endif /* PCRE_DEBUG */ +#endif /* DEBUG */ return (pcre *)re; } |
