summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_compile.c
diff options
context:
space:
mode:
authorIlia Alshanetsky <iliaa@php.net>2010-02-03 12:59:00 +0000
committerIlia Alshanetsky <iliaa@php.net>2010-02-03 12:59:00 +0000
commit91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8 (patch)
tree1817d9652f110cf0cd4644ed8586ca56d0c05d9e /ext/pcre/pcrelib/pcre_compile.c
parent43d5429381237518ced74149f29a851c30307bea (diff)
downloadphp-git-91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8.tar.gz
Downgrade bundled PCRE to version 7.9 due to 8.0+ version use of C99
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r--ext/pcre/pcrelib/pcre_compile.c396
1 files changed, 76 insertions, 320 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index eaf3d90ce8..1e0672c5cd 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2010 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -51,11 +51,10 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
-/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
-also used by pcretest. PCRE_DEBUG is not defined when building a production
-library. */
+/* When DEBUG is defined, we need the pcre_printint() function, which is also
+used by pcretest. DEBUG is not defined when building a production library. */
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
#include "pcre_printint.src"
#endif
@@ -340,9 +339,7 @@ static const char error_texts[] =
"number is too big\0"
"subpattern name expected\0"
"digit expected after (?+\0"
- "] is an invalid data character in JavaScript compatibility mode\0"
- /* 65 */
- "different names for subpatterns of the same number are not allowed";
+ "] is an invalid data character in JavaScript compatibility mode";
/* Table to identify digits and hex digits. This is used when compiling
@@ -1101,7 +1098,6 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
if (name != NULL && lorn == ptr - thisname &&
strncmp((const char *)name, (const char *)thisname, lorn) == 0)
return *count;
- term++;
}
}
}
@@ -1136,21 +1132,19 @@ for (; *ptr != 0; ptr++)
BOOL negate_class = FALSE;
for (;;)
{
- if (ptr[1] == CHAR_BACKSLASH)
+ int c = *(++ptr);
+ if (c == CHAR_BACKSLASH)
{
- if (ptr[2] == CHAR_E)
- ptr+= 2;
- else if (strncmp((const char *)ptr+2,
+ if (ptr[1] == CHAR_E)
+ ptr++;
+ else if (strncmp((const char *)ptr+1,
STR_Q STR_BACKSLASH STR_E, 3) == 0)
- ptr += 4;
+ ptr += 3;
else
break;
}
- else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
- {
+ else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
negate_class = TRUE;
- ptr++;
- }
else break;
}
@@ -1316,9 +1310,7 @@ for (;;)
case OP_CALLOUT:
case OP_CREF:
- case OP_NCREF:
case OP_RREF:
- case OP_NRREF:
case OP_DEF:
code += _pcre_OP_lengths[*code];
break;
@@ -1334,34 +1326,23 @@ for (;;)
/*************************************************
-* Find the fixed length of a branch *
+* Find the fixed length of a pattern *
*************************************************/
-/* Scan a branch and compute the fixed length of subject that will match it,
+/* Scan a pattern and compute the fixed length of subject that will match it,
if the length is fixed. This is needed for dealing with backward assertions.
-In UTF8 mode, the result is in characters rather than bytes. The branch is
-temporarily terminated with OP_END when this function is called.
-
-This function is called when a backward assertion is encountered, so that if it
-fails, the error message can point to the correct place in the pattern.
-However, we cannot do this when the assertion contains subroutine calls,
-because they can be forward references. We solve this by remembering this case
-and doing the check at the end; a flag specifies which mode we are running in.
+In UTF8 mode, the result is in characters rather than bytes.
Arguments:
code points to the start of the pattern (the bracket)
options the compiling options
- atend TRUE if called when the pattern is complete
- cd the "compile data" structure
-Returns: the fixed length,
- or -1 if there is no fixed length,
+Returns: the fixed length, or -1 if there is no fixed length,
or -2 if \C was encountered
- or -3 if an OP_RECURSE item was encountered and atend is FALSE
*/
static int
-find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
+find_fixedlength(uschar *code, int options)
{
int length = -1;
@@ -1374,7 +1355,6 @@ branch, check the length against that of the other branches. */
for (;;)
{
int d;
- uschar *ce, *cs;
register int op = *cc;
switch (op)
{
@@ -1382,7 +1362,7 @@ for (;;)
case OP_BRA:
case OP_ONCE:
case OP_COND:
- d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
+ d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -1405,21 +1385,6 @@ for (;;)
branchlength = 0;
break;
- /* A true recursion implies not fixed length, but a subroutine call may
- be OK. If the subroutine is a forward reference, we can't deal with
- it until the end of the pattern, so return -3. */
-
- case OP_RECURSE:
- if (!atend) return -3;
- cs = ce = (uschar *)cd->start_code + GET(cc, 1); /* Start subpattern */
- do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */
- if (cc > cs && cc < ce) return -1; /* Recursion */
- d = find_fixedlength(cs + 2, options, atend, cd);
- if (d < 0) return d;
- branchlength += d;
- cc += 1 + LINK_SIZE;
- break;
-
/* Skip over assertive subpatterns */
case OP_ASSERT:
@@ -1433,9 +1398,7 @@ for (;;)
case OP_REVERSE:
case OP_CREF:
- case OP_NCREF:
case OP_RREF:
- case OP_NRREF:
case OP_DEF:
case OP_OPT:
case OP_CALLOUT:
@@ -1458,8 +1421,10 @@ for (;;)
branchlength++;
cc += 2;
#ifdef SUPPORT_UTF8
- if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
- cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+ if ((options & PCRE_UTF8) != 0)
+ {
+ while ((*cc & 0xc0) == 0x80) cc++;
+ }
#endif
break;
@@ -1470,8 +1435,10 @@ for (;;)
branchlength += GET2(cc,1);
cc += 4;
#ifdef SUPPORT_UTF8
- if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
- cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+ if ((options & PCRE_UTF8) != 0)
+ {
+ while((*cc & 0x80) == 0x80) cc++;
+ }
#endif
break;
@@ -1550,25 +1517,22 @@ for (;;)
/*************************************************
-* Scan compiled regex for specific bracket *
+* Scan compiled regex for numbered bracket *
*************************************************/
/* This little function scans through a compiled pattern until it finds a
-capturing bracket with the given number, or, if the number is negative, an
-instance of OP_REVERSE for a lookbehind. The function is global in the C sense
-so that it can be called from pcre_study() when finding the minimum matching
-length.
+capturing bracket with the given number.
Arguments:
code points to start of expression
utf8 TRUE in UTF-8 mode
- number the required bracket number or negative to find a lookbehind
+ number the required bracket number
Returns: pointer to the opcode for the bracket, or NULL if not found
*/
-const uschar *
-_pcre_find_bracket(const uschar *code, BOOL utf8, int number)
+static const uschar *
+find_bracket(const uschar *code, BOOL utf8, int number)
{
for (;;)
{
@@ -1581,14 +1545,6 @@ for (;;)
if (c == OP_XCLASS) code += GET(code, 1);
- /* Handle recursion */
-
- else if (c == OP_REVERSE)
- {
- if (number < 0) return (uschar *)code;
- code += _pcre_OP_lengths[c];
- }
-
/* Handle capturing bracket */
else if (c == OP_CBRA)
@@ -1954,13 +1910,10 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
- if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
- break;
-
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
- if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
+ if (utf8) while ((code[2] & 0xc0) == 0x80) code++;
break;
#endif
}
@@ -1993,10 +1946,9 @@ static BOOL
could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
BOOL utf8)
{
-while (bcptr != NULL && bcptr->current_branch >= code)
+while (bcptr != NULL && bcptr->current >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
- return FALSE;
+ if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
bcptr = bcptr->outer;
}
return TRUE;
@@ -2658,7 +2610,7 @@ BOOL utf8 = FALSE;
uschar *utf8_char = NULL;
#endif
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
if (lengthptr != NULL) DPRINTF((">> start branch\n"));
#endif
@@ -2717,7 +2669,7 @@ for (;; ptr++)
if (lengthptr != NULL)
{
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
if (code > cd->hwm) cd->hwm = code; /* High water info */
#endif
if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
@@ -3915,15 +3867,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (repeat_max == 0) goto END_REPEAT;
- /*--------------------------------------------------------------------*/
- /* This code is obsolete from release 8.00; the restriction was finally
- removed: */
-
/* All real repeats make it impossible to handle partial matching (maybe
one day we will be able to remove this restriction). */
- /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
- /*--------------------------------------------------------------------*/
+ if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
/* Combine the op_type with the repeat_type */
@@ -4070,15 +4017,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
goto END_REPEAT;
}
- /*--------------------------------------------------------------------*/
- /* This code is obsolete from release 8.00; the restriction was finally
- removed: */
-
/* All real repeats make it impossible to handle partial matching (maybe
one day we will be able to remove this restriction). */
- /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
- /*--------------------------------------------------------------------*/
+ if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
if (repeat_min == 0 && repeat_max == -1)
*code++ = OP_CRSTAR + repeat_type;
@@ -4213,15 +4155,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
/* In the pre-compile phase, we don't actually do the replication. We
just adjust the length as if we had. Do some paranoid checks for
- potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
- integer type when available, otherwise double. */
+ potential integer overflow. */
if (lengthptr != NULL)
{
int delta = (repeat_min - 1)*length_prevgroup;
- if ((INT64_OR_DOUBLE)(repeat_min - 1)*
- (INT64_OR_DOUBLE)length_prevgroup >
- (INT64_OR_DOUBLE)INT_MAX ||
+ if ((double)(repeat_min - 1)*(double)length_prevgroup >
+ (double)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
@@ -4267,16 +4207,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
just adjust the length as if we had. For each repetition we must add 1
to the length for BRAZERO and for all but the last repetition we must
add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
- paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
- a 64-bit integer type when available, otherwise double. */
+ paranoid checks to avoid integer overflow. */
if (lengthptr != NULL && repeat_max > 0)
{
int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
2 - 2*LINK_SIZE; /* Last one doesn't nest */
- if ((INT64_OR_DOUBLE)repeat_max *
- (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
- > (INT64_OR_DOUBLE)INT_MAX ||
+ if ((double)repeat_max *
+ (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+ > (double)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
@@ -4396,20 +4335,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (possessive_quantifier)
{
int len;
-
- if (*tempcode == OP_TYPEEXACT)
+ if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
+ *tempcode == OP_NOTEXACT)
tempcode += _pcre_OP_lengths[*tempcode] +
- ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
-
- else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
- {
- tempcode += _pcre_OP_lengths[*tempcode];
-#ifdef SUPPORT_UTF8
- if (utf8 && tempcode[-1] >= 0xc0)
- tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
-#endif
- }
-
+ ((*tempcode == OP_TYPEEXACT &&
+ (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
len = code - tempcode;
if (len > 0) switch (*tempcode)
{
@@ -4487,19 +4417,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (namelen == verbs[i].len &&
strncmp((char *)name, vn, namelen) == 0)
{
- /* Check for open captures before ACCEPT */
-
- if (verbs[i].op == OP_ACCEPT)
- {
- open_capitem *oc;
- cd->had_accept = TRUE;
- for (oc = cd->open_caps; oc != NULL; oc = oc->next)
- {
- *code++ = OP_CLOSE;
- PUT2INC(code, 0, oc->number);
- }
- }
- *code++ = verbs[i].op;
+ *code = verbs[i].op;
+ if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
break;
}
vn += verbs[i].len + 1;
@@ -4661,10 +4580,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
}
/* Otherwise (did not start with "+" or "-"), start by looking for the
- name. If we find a name, add one to the opcode to change OP_CREF or
- OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
- except they record that the reference was originally to a name. The
- information is used to check duplicate names. */
+ name. */
slot = cd->name_table;
for (i = 0; i < cd->names_found; i++)
@@ -4679,7 +4595,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
recno = GET2(slot, 0);
PUT2(code, 2+LINK_SIZE, recno);
- code[1+LINK_SIZE]++;
}
/* Search the pattern for a forward reference */
@@ -4688,7 +4603,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
(options & PCRE_EXTENDED) != 0)) > 0)
{
PUT2(code, 2+LINK_SIZE, i);
- code[1+LINK_SIZE]++;
}
/* If terminator == 0 it means that the name followed directly after
@@ -4881,24 +4795,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
}
}
- /* In the real compile, create the entry in the table, maintaining
- alphabetical order. Duplicate names for different numbers are
- permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
- number are always OK. (An existing number can be re-used if (?|
- appears in the pattern.) In either event, a duplicate name results in
- a duplicate entry in the table, even if the number is the same. This
- is because the number of names, and hence the table size, is computed
- in the pre-compile, and it affects various numbers and pointers which
- would all have to be modified, and the compiled code moved down, if
- duplicates with the same number were omitted from the table. This
- doesn't seem worth the hassle. However, *different* names for the
- same number are not permitted. */
+ /* In the real compile, create the entry in the table */
else
{
- BOOL dupname = FALSE;
slot = cd->name_table;
-
for (i = 0; i < cd->names_found; i++)
{
int crc = memcmp(name, slot+2, namelen);
@@ -4906,66 +4807,33 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
if (slot[2+namelen] == 0)
{
- if (GET2(slot, 0) != cd->bracount + 1 &&
- (options & PCRE_DUPNAMES) == 0)
+ if ((options & PCRE_DUPNAMES) == 0)
{
*errorcodeptr = ERR43;
goto FAILED;
}
- else dupname = TRUE;
}
- else crc = -1; /* Current name is a substring */
+ else crc = -1; /* Current name is substring */
}
-
- /* Make space in the table and break the loop for an earlier
- name. For a duplicate or later name, carry on. We do this for
- duplicates so that in the simple case (when ?(| is not used) they
- are in order of their numbers. */
-
if (crc < 0)
{
memmove(slot + cd->name_entry_size, slot,
(cd->names_found - i) * cd->name_entry_size);
break;
}
-
- /* Continue the loop for a later or duplicate name */
-
slot += cd->name_entry_size;
}
- /* For non-duplicate names, check for a duplicate number before
- adding the new name. */
-
- if (!dupname)
- {
- uschar *cslot = cd->name_table;
- for (i = 0; i < cd->names_found; i++)
- {
- if (cslot != slot)
- {
- if (GET2(cslot, 0) == cd->bracount + 1)
- {
- *errorcodeptr = ERR65;
- goto FAILED;
- }
- }
- else i--;
- cslot += cd->name_entry_size;
- }
- }
-
PUT2(slot, 0, cd->bracount + 1);
memcpy(slot + 2, name, namelen);
slot[2+namelen] = 0;
}
}
- /* In both pre-compile and compile, count the number of names we've
- encountered. */
+ /* In both cases, count the number of names we've encountered. */
- cd->names_found++;
ptr++; /* Move past > or ' */
+ cd->names_found++;
goto NUMBERED_GROUP;
@@ -5134,8 +5002,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (lengthptr == NULL)
{
*code = OP_END;
- if (recno != 0)
- called = _pcre_find_bracket(cd->start_code, utf8, recno);
+ if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);
/* Forward reference */
@@ -5251,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
cd->external_options = newoptions;
}
- else
+ else
{
if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
{
@@ -5588,7 +5455,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (-c >= ESC_REF)
{
- open_capitem *oc;
recno = -c - ESC_REF;
HANDLE_REFERENCE: /* Come here from named backref handling */
@@ -5598,19 +5464,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
PUT2INC(code, 0, recno);
cd->backref_map |= (recno < 32)? (1 << recno) : 1;
if (recno > cd->top_backref) cd->top_backref = recno;
-
- /* Check to see if this back reference is recursive, that it, it
- is inside the group that it references. A flag is set so that the
- group can be made atomic. */
-
- for (oc = cd->open_caps; oc != NULL; oc = oc->next)
- {
- if (oc->number == recno)
- {
- oc->flag = TRUE;
- break;
- }
- }
}
/* So are Unicode property matches, if supported. */
@@ -5793,18 +5646,15 @@ uschar *code = *codeptr;
uschar *last_branch = code;
uschar *start_bracket = code;
uschar *reverse_count = NULL;
-open_capitem capitem;
-int capnumber = 0;
int firstbyte, reqbyte;
int branchfirstbyte, branchreqbyte;
int length;
int orig_bracount;
int max_bracount;
-int old_external_options = cd->external_options;
branch_chain bc;
bc.outer = bcptr;
-bc.current_branch = code;
+bc.current = code;
firstbyte = reqbyte = REQ_UNSET;
@@ -5822,19 +5672,6 @@ the code that abstracts option settings at the start of the pattern and makes
them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
pre-compile phase to find out whether anything has yet been compiled or not. */
-/* If this is a capturing subpattern, add to the chain of open capturing items
-so that we can detect them if (*ACCEPT) is encountered. This is also used to
-detect groups that contain recursive back references to themselves. */
-
-if (*code == OP_CBRA)
- {
- capnumber = GET2(code, 1 + LINK_SIZE);
- capitem.number = capnumber;
- capitem.next = cd->open_caps;
- capitem.flag = FALSE;
- cd->open_caps = &capitem;
- }
-
/* Offset is set zero to mark that this bracket is still open */
PUT(code, 1, 0);
@@ -5879,15 +5716,6 @@ for (;;)
return FALSE;
}
- /* If the external options have changed during this branch, it means that we
- are at the top level, and a leading option setting has been encountered. We
- need to re-set the original option values to take account of this so that,
- during the pre-compile phase, we know to allow for a re-set at the start of
- subsequent branches. */
-
- if (old_external_options != cd->external_options)
- oldims = cd->external_options & PCRE_IMS;
-
/* Keep the highest bracket count in case (?| was used and some branch
has fewer than the rest. */
@@ -5938,29 +5766,21 @@ for (;;)
/* If lookbehind, check that this branch matches a fixed-length string, and
put the length into the OP_REVERSE item. Temporarily mark the end of the
- branch with OP_END. If the branch contains OP_RECURSE, the result is -3
- because there may be forward references that we can't check here. Set a
- flag to cause another lookbehind check at the end. Why not do it all at the
- end? Because common, erroneous checks are picked up here and the offset of
- the problem can be shown. */
+ branch with OP_END. */
if (lookbehind)
{
int fixed_length;
*code = OP_END;
- fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
+ fixed_length = find_fixedlength(last_branch, options);
DPRINTF(("fixed length = %d\n", fixed_length));
- if (fixed_length == -3)
- {
- cd->check_lookbehind = TRUE;
- }
- else if (fixed_length < 0)
+ if (fixed_length < 0)
{
*errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
*ptrptr = ptr;
return FALSE;
}
- else { PUT(reverse_count, 0, fixed_length); }
+ PUT(reverse_count, 0, fixed_length);
}
}
@@ -5994,28 +5814,7 @@ for (;;)
PUT(code, 1, code - start_bracket);
code += 1 + LINK_SIZE;
- /* If it was a capturing subpattern, check to see if it contained any
- recursive back references. If so, we must wrap it in atomic brackets.
- In any event, remove the block from the chain. */
-
- if (capnumber > 0)
- {
- if (cd->open_caps->flag)
- {
- memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
- code - start_bracket);
- *start_bracket = OP_ONCE;
- code += 1 + LINK_SIZE;
- PUT(start_bracket, 1, code - start_bracket);
- *code = OP_KET;
- PUT(code, 1, code - start_bracket);
- code += 1 + LINK_SIZE;
- length += 2 + 2*LINK_SIZE;
- }
- cd->open_caps = cd->open_caps->next;
- }
-
- /* Reset options if needed. */
+ /* Resetting option if needed */
if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
{
@@ -6064,7 +5863,7 @@ for (;;)
{
*code = OP_ALT;
PUT(code, 1, code - last_branch);
- bc.current_branch = last_branch = code;
+ bc.current = last_branch = code;
code += 1 + LINK_SIZE;
}
@@ -6211,9 +6010,7 @@ do {
switch (*scode)
{
case OP_CREF:
- case OP_NCREF:
case OP_RREF:
- case OP_NRREF:
case OP_DEF:
return FALSE;
@@ -6382,7 +6179,9 @@ int length = 1; /* For final END opcode */
int firstbyte, reqbyte, newline;
int errorcode = 0;
int skipatstart = 0;
-BOOL utf8 = (options & PCRE_UTF8) != 0;
+#ifdef SUPPORT_UTF8
+BOOL utf8;
+#endif
size_t size;
uschar *code;
const uschar *codestart;
@@ -6479,14 +6278,15 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
/* Can't support UTF8 unless PCRE has been compiled to include the code. */
#ifdef SUPPORT_UTF8
+utf8 = (options & PCRE_UTF8) != 0;
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
- (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
+ (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
{
errorcode = ERR44;
goto PCRE_EARLY_ERROR_RETURN2;
}
#else
-if (utf8)
+if ((options & PCRE_UTF8) != 0)
{
errorcode = ERR32;
goto PCRE_EARLY_ERROR_RETURN;
@@ -6575,7 +6375,6 @@ cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
cd->req_varyopt = 0;
cd->external_options = options;
cd->external_flags = 0;
-cd->open_caps = NULL;
/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
don't need to look at the result of the function here. The initial options have
@@ -6650,8 +6449,6 @@ cd->start_code = codestart;
cd->hwm = cworkspace;
cd->req_varyopt = 0;
cd->had_accept = FALSE;
-cd->check_lookbehind = FALSE;
-cd->open_caps = NULL;
/* Set up a starting, non-extracting bracket, then compile the expression. On
error, errorcode will be set non-zero, so we don't need to look at the result
@@ -6677,7 +6474,7 @@ if debugging, leave the test till after things are printed out. */
*code++ = OP_END;
-#ifndef PCRE_DEBUG
+#ifndef DEBUG
if (code - codestart > length) errorcode = ERR23;
#endif
@@ -6690,7 +6487,7 @@ while (errorcode == 0 && cd->hwm > cworkspace)
cd->hwm -= LINK_SIZE;
offset = GET(cd->hwm, 0);
recno = GET(codestart, offset);
- groupptr = _pcre_find_bracket(codestart, utf8, recno);
+ groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);
if (groupptr == NULL) errorcode = ERR53;
else PUT(((uschar *)codestart), offset, groupptr - codestart);
}
@@ -6700,47 +6497,6 @@ subpattern. */
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
-/* If there were any lookbehind assertions that contained OP_RECURSE
-(recursions or subroutine calls), a flag is set for them to be checked here,
-because they may contain forward references. Actual recursions can't be fixed
-length, but subroutine calls can. It is done like this so that those without
-OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
-exceptional ones forgo this. We scan the pattern to check that they are fixed
-length, and set their lengths. */
-
-if (cd->check_lookbehind)
- {
- uschar *cc = (uschar *)codestart;
-
- /* Loop, searching for OP_REVERSE items, and process those that do not have
- their length set. (Actually, it will also re-process any that have a length
- of zero, but that is a pathological case, and it does no harm.) When we find
- one, we temporarily terminate the branch it is in while we scan it. */
-
- for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
- cc != NULL;
- cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
- {
- if (GET(cc, 1) == 0)
- {
- int fixed_length;
- uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
- int end_op = *be;
- *be = OP_END;
- fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
- *be = end_op;
- DPRINTF(("fixed length = %d\n", fixed_length));
- if (fixed_length < 0)
- {
- errorcode = (fixed_length == -2)? ERR36 : ERR25;
- break;
- }
- PUT(cc, 1, fixed_length);
- }
- cc += 1 + LINK_SIZE;
- }
- }
-
/* Failed to compile, or error while post-processing */
if (errorcode != 0)
@@ -6801,7 +6557,7 @@ if (reqbyte >= 0 &&
/* Print out the compiled data if debugging is enabled. This is never the
case when building a production library. */
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
printf("Length = %d top_bracket = %d top_backref = %d\n",
length, re->top_bracket, re->top_backref);
@@ -6839,7 +6595,7 @@ if (code - codestart > length)
if (errorcodeptr != NULL) *errorcodeptr = ERR23;
return NULL;
}
-#endif /* PCRE_DEBUG */
+#endif /* DEBUG */
return (pcre *)re;
}