Downgrade bundled PCRE to version 7.9 due to 8.0+ version use of C99

author: Ilia Alshanetsky <iliaa@php.net> 2010-02-03 12:59:00 +0000
committer: Ilia Alshanetsky <iliaa@php.net> 2010-02-03 12:59:00 +0000
commit: 91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8 (patch)
tree: 1817d9652f110cf0cd4644ed8586ca56d0c05d9e /ext/pcre/pcrelib/pcre_compile.c
parent: 43d5429381237518ced74149f29a851c30307bea (diff)
download: php-git-91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8.tar.gz
1 files changed, 76 insertions, 320 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index eaf3d90ce8..1e0672c5cd 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -51,11 +51,10 @@ supporting internal functions that are not used by other modules. */
 #include "pcre_internal.h"
 
 
-/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
-also used by pcretest. PCRE_DEBUG is not defined when building a production
-library. */
+/* When DEBUG is defined, we need the pcre_printint() function, which is also
+used by pcretest. DEBUG is not defined when building a production library. */
 
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
 #include "pcre_printint.src"
 #endif
 
@@ -340,9 +339,7 @@ static const char error_texts[] =
   "number is too big\0"
   "subpattern name expected\0"
   "digit expected after (?+\0"
-  "] is an invalid data character in JavaScript compatibility mode\0"
-  /* 65 */
-  "different names for subpatterns of the same number are not allowed";
+  "] is an invalid data character in JavaScript compatibility mode";
 
 
 /* Table to identify digits and hex digits. This is used when compiling
@@ -1101,7 +1098,6 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
       if (name != NULL && lorn == ptr - thisname &&
           strncmp((const char *)name, (const char *)thisname, lorn) == 0)
         return *count;
-      term++;
       }
     }
   }
@@ -1136,21 +1132,19 @@ for (; *ptr != 0; ptr++)
     BOOL negate_class = FALSE;
     for (;;)
       {
-      if (ptr[1] == CHAR_BACKSLASH)
+      int c = *(++ptr);
+      if (c == CHAR_BACKSLASH)
         {
-        if (ptr[2] == CHAR_E)
-          ptr+= 2;
-        else if (strncmp((const char *)ptr+2,
+        if (ptr[1] == CHAR_E)
+          ptr++;
+        else if (strncmp((const char *)ptr+1,
                  STR_Q STR_BACKSLASH STR_E, 3) == 0)
-          ptr += 4;
+          ptr += 3;
         else
           break;
         }
-      else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
-        {
+      else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
         negate_class = TRUE;
-        ptr++;
-        }
       else break;
       }
 
@@ -1316,9 +1310,7 @@ for (;;)
 
     case OP_CALLOUT:
     case OP_CREF:
-    case OP_NCREF:
     case OP_RREF:
-    case OP_NRREF:
     case OP_DEF:
     code += _pcre_OP_lengths[*code];
     break;
@@ -1334,34 +1326,23 @@ for (;;)
 
 
 /*************************************************
-*        Find the fixed length of a branch       *
+*        Find the fixed length of a pattern      *
 *************************************************/
 
-/* Scan a branch and compute the fixed length of subject that will match it,
+/* Scan a pattern and compute the fixed length of subject that will match it,
 if the length is fixed. This is needed for dealing with backward assertions.
-In UTF8 mode, the result is in characters rather than bytes. The branch is
-temporarily terminated with OP_END when this function is called.
-
-This function is called when a backward assertion is encountered, so that if it
-fails, the error message can point to the correct place in the pattern.
-However, we cannot do this when the assertion contains subroutine calls,
-because they can be forward references. We solve this by remembering this case
-and doing the check at the end; a flag specifies which mode we are running in.
+In UTF8 mode, the result is in characters rather than bytes.
 
 Arguments:
   code     points to the start of the pattern (the bracket)
   options  the compiling options
-  atend    TRUE if called when the pattern is complete
-  cd       the "compile data" structure
 
-Returns:   the fixed length,
-             or -1 if there is no fixed length,
+Returns:   the fixed length, or -1 if there is no fixed length,
              or -2 if \C was encountered
-             or -3 if an OP_RECURSE item was encountered and atend is FALSE
 */
 
 static int
-find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
+find_fixedlength(uschar *code, int options)
 {
 int length = -1;
 
@@ -1374,7 +1355,6 @@ branch, check the length against that of the other branches. */
 for (;;)
   {
   int d;
-  uschar *ce, *cs;
   register int op = *cc;
   switch (op)
     {
@@ -1382,7 +1362,7 @@ for (;;)
     case OP_BRA:
     case OP_ONCE:
     case OP_COND:
-    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
+    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -1405,21 +1385,6 @@ for (;;)
     branchlength = 0;
     break;
 
-    /* A true recursion implies not fixed length, but a subroutine call may
-    be OK. If the subroutine is a forward reference, we can't deal with
-    it until the end of the pattern, so return -3. */
-
-    case OP_RECURSE:
-    if (!atend) return -3;
-    cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
-    do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */
-    if (cc > cs && cc < ce) return -1;                /* Recursion */
-    d = find_fixedlength(cs + 2, options, atend, cd);
-    if (d < 0) return d;
-    branchlength += d;
-    cc += 1 + LINK_SIZE;
-    break;
-
     /* Skip over assertive subpatterns */
 
     case OP_ASSERT:
@@ -1433,9 +1398,7 @@ for (;;)
 
     case OP_REVERSE:
     case OP_CREF:
-    case OP_NCREF:
     case OP_RREF:
-    case OP_NRREF:
     case OP_DEF:
     case OP_OPT:
     case OP_CALLOUT:
@@ -1458,8 +1421,10 @@ for (;;)
     branchlength++;
     cc += 2;
 #ifdef SUPPORT_UTF8
-    if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
-      cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+    if ((options & PCRE_UTF8) != 0)
+      {
+      while ((*cc & 0xc0) == 0x80) cc++;
+      }
 #endif
     break;
 
@@ -1470,8 +1435,10 @@ for (;;)
     branchlength += GET2(cc,1);
     cc += 4;
 #ifdef SUPPORT_UTF8
-    if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
-      cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+    if ((options & PCRE_UTF8) != 0)
+      {
+      while((*cc & 0x80) == 0x80) cc++;
+      }
 #endif
     break;
 
@@ -1550,25 +1517,22 @@ for (;;)
 
 
 /*************************************************
-*    Scan compiled regex for specific bracket    *
+*    Scan compiled regex for numbered bracket    *
 *************************************************/
 
 /* This little function scans through a compiled pattern until it finds a
-capturing bracket with the given number, or, if the number is negative, an
-instance of OP_REVERSE for a lookbehind. The function is global in the C sense
-so that it can be called from pcre_study() when finding the minimum matching
-length.
+capturing bracket with the given number.
 
 Arguments:
   code        points to start of expression
   utf8        TRUE in UTF-8 mode
-  number      the required bracket number or negative to find a lookbehind
+  number      the required bracket number
 
 Returns:      pointer to the opcode for the bracket, or NULL if not found
 */
 
-const uschar *
-_pcre_find_bracket(const uschar *code, BOOL utf8, int number)
+static const uschar *
+find_bracket(const uschar *code, BOOL utf8, int number)
 {
 for (;;)
   {
@@ -1581,14 +1545,6 @@ for (;;)
 
   if (c == OP_XCLASS) code += GET(code, 1);
 
-  /* Handle recursion */
-
-  else if (c == OP_REVERSE)
-    {
-    if (number < 0) return (uschar *)code;
-    code += _pcre_OP_lengths[c];
-    }
-
   /* Handle capturing bracket */
 
   else if (c == OP_CBRA)
@@ -1954,13 +1910,10 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
     case OP_QUERY:
     case OP_MINQUERY:
     case OP_POSQUERY:
-    if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
-    break;
-
     case OP_UPTO:
     case OP_MINUPTO:
     case OP_POSUPTO:
-    if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
+    if (utf8) while ((code[2] & 0xc0) == 0x80) code++;
     break;
 #endif
     }
@@ -1993,10 +1946,9 @@ static BOOL
 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
   BOOL utf8)
 {
-while (bcptr != NULL && bcptr->current_branch >= code)
+while (bcptr != NULL && bcptr->current >= code)
   {
-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
-    return FALSE;
+  if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
   bcptr = bcptr->outer;
   }
 return TRUE;
@@ -2658,7 +2610,7 @@ BOOL utf8 = FALSE;
 uschar *utf8_char = NULL;
 #endif
 
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
 if (lengthptr != NULL) DPRINTF((">> start branch\n"));
 #endif
 
@@ -2717,7 +2669,7 @@ for (;; ptr++)
 
   if (lengthptr != NULL)
     {
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
     if (code > cd->hwm) cd->hwm = code;                 /* High water info */
 #endif
     if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
@@ -3915,15 +3867,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       if (repeat_max == 0) goto END_REPEAT;
 
-      /*--------------------------------------------------------------------*/
-      /* This code is obsolete from release 8.00; the restriction was finally
-      removed: */
-
       /* All real repeats make it impossible to handle partial matching (maybe
       one day we will be able to remove this restriction). */
 
-      /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
-      /*--------------------------------------------------------------------*/
+      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
 
       /* Combine the op_type with the repeat_type */
 
@@ -4070,15 +4017,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         goto END_REPEAT;
         }
 
-      /*--------------------------------------------------------------------*/
-      /* This code is obsolete from release 8.00; the restriction was finally
-      removed: */
-
       /* All real repeats make it impossible to handle partial matching (maybe
       one day we will be able to remove this restriction). */
 
-      /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
-      /*--------------------------------------------------------------------*/
+      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
 
       if (repeat_min == 0 && repeat_max == -1)
         *code++ = OP_CRSTAR + repeat_type;
@@ -4213,15 +4155,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           {
           /* In the pre-compile phase, we don't actually do the replication. We
           just adjust the length as if we had. Do some paranoid checks for
-          potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
-          integer type when available, otherwise double. */
+          potential integer overflow. */
 
           if (lengthptr != NULL)
             {
             int delta = (repeat_min - 1)*length_prevgroup;
-            if ((INT64_OR_DOUBLE)(repeat_min - 1)*
-                  (INT64_OR_DOUBLE)length_prevgroup >
-                    (INT64_OR_DOUBLE)INT_MAX ||
+            if ((double)(repeat_min - 1)*(double)length_prevgroup >
+                                                            (double)INT_MAX ||
                 OFLOW_MAX - *lengthptr < delta)
               {
               *errorcodeptr = ERR20;
@@ -4267,16 +4207,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         just adjust the length as if we had. For each repetition we must add 1
         to the length for BRAZERO and for all but the last repetition we must
         add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
-        paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
-        a 64-bit integer type when available, otherwise double. */
+        paranoid checks to avoid integer overflow. */
 
         if (lengthptr != NULL && repeat_max > 0)
           {
           int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
                       2 - 2*LINK_SIZE;   /* Last one doesn't nest */
-          if ((INT64_OR_DOUBLE)repeat_max *
-                (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
-                  > (INT64_OR_DOUBLE)INT_MAX ||
+          if ((double)repeat_max *
+                (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+                  > (double)INT_MAX ||
               OFLOW_MAX - *lengthptr < delta)
             {
             *errorcodeptr = ERR20;
@@ -4396,20 +4335,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     if (possessive_quantifier)
       {
       int len;
-
-      if (*tempcode == OP_TYPEEXACT)
+      if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
+          *tempcode == OP_NOTEXACT)
         tempcode += _pcre_OP_lengths[*tempcode] +
-          ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
-
-      else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
-        {
-        tempcode += _pcre_OP_lengths[*tempcode];
-#ifdef SUPPORT_UTF8
-        if (utf8 && tempcode[-1] >= 0xc0)
-          tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
-#endif
-        }
-
+          ((*tempcode == OP_TYPEEXACT &&
+             (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
       len = code - tempcode;
       if (len > 0) switch (*tempcode)
         {
@@ -4487,19 +4417,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         if (namelen == verbs[i].len &&
             strncmp((char *)name, vn, namelen) == 0)
           {
-          /* Check for open captures before ACCEPT */
-
-          if (verbs[i].op == OP_ACCEPT)
-            {
-            open_capitem *oc;
-            cd->had_accept = TRUE;
-            for (oc = cd->open_caps; oc != NULL; oc = oc->next)
-              {
-              *code++ = OP_CLOSE;
-              PUT2INC(code, 0, oc->number);
-              }
-            }
-          *code++ = verbs[i].op;
+          *code = verbs[i].op;
+          if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
           break;
           }
         vn += verbs[i].len + 1;
@@ -4661,10 +4580,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           }
 
         /* Otherwise (did not start with "+" or "-"), start by looking for the
-        name. If we find a name, add one to the opcode to change OP_CREF or
-        OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
-        except they record that the reference was originally to a name. The
-        information is used to check duplicate names. */
+        name. */
 
         slot = cd->name_table;
         for (i = 0; i < cd->names_found; i++)
@@ -4679,7 +4595,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           {
           recno = GET2(slot, 0);
           PUT2(code, 2+LINK_SIZE, recno);
-          code[1+LINK_SIZE]++;
           }
 
         /* Search the pattern for a forward reference */
@@ -4688,7 +4603,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
                         (options & PCRE_EXTENDED) != 0)) > 0)
           {
           PUT2(code, 2+LINK_SIZE, i);
-          code[1+LINK_SIZE]++;
           }
 
         /* If terminator == 0 it means that the name followed directly after
@@ -4881,24 +4795,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               }
             }
 
-          /* In the real compile, create the entry in the table, maintaining
-          alphabetical order. Duplicate names for different numbers are
-          permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
-          number are always OK. (An existing number can be re-used if (?|
-          appears in the pattern.) In either event, a duplicate name results in
-          a duplicate entry in the table, even if the number is the same. This
-          is because the number of names, and hence the table size, is computed
-          in the pre-compile, and it affects various numbers and pointers which
-          would all have to be modified, and the compiled code moved down, if
-          duplicates with the same number were omitted from the table. This
-          doesn't seem worth the hassle. However, *different* names for the
-          same number are not permitted. */
+          /* In the real compile, create the entry in the table */
 
           else
             {
-            BOOL dupname = FALSE;
             slot = cd->name_table;
-
             for (i = 0; i < cd->names_found; i++)
               {
               int crc = memcmp(name, slot+2, namelen);
@@ -4906,66 +4807,33 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
                 {
                 if (slot[2+namelen] == 0)
                   {
-                  if (GET2(slot, 0) != cd->bracount + 1 &&
-                      (options & PCRE_DUPNAMES) == 0)
+                  if ((options & PCRE_DUPNAMES) == 0)
                     {
                     *errorcodeptr = ERR43;
                     goto FAILED;
                     }
-                  else dupname = TRUE;
                   }
-                else crc = -1;      /* Current name is a substring */
+                else crc = -1;      /* Current name is substring */
                 }
-
-              /* Make space in the table and break the loop for an earlier
-              name. For a duplicate or later name, carry on. We do this for
-              duplicates so that in the simple case (when ?(| is not used) they
-              are in order of their numbers. */
-
               if (crc < 0)
                 {
                 memmove(slot + cd->name_entry_size, slot,
                   (cd->names_found - i) * cd->name_entry_size);
                 break;
                 }
-
-              /* Continue the loop for a later or duplicate name */
-
               slot += cd->name_entry_size;
               }
 
-            /* For non-duplicate names, check for a duplicate number before
-            adding the new name. */
-
-            if (!dupname)
-              {
-              uschar *cslot = cd->name_table;
-              for (i = 0; i < cd->names_found; i++)
-                {
-                if (cslot != slot)
-                  {
-                  if (GET2(cslot, 0) == cd->bracount + 1)
-                    {
-                    *errorcodeptr = ERR65;
-                    goto FAILED;
-                    }
-                  }
-                else i--;
-                cslot += cd->name_entry_size;
-                }
-              }
-
             PUT2(slot, 0, cd->bracount + 1);
             memcpy(slot + 2, name, namelen);
             slot[2+namelen] = 0;
             }
           }
 
-        /* In both pre-compile and compile, count the number of names we've
-        encountered. */
+        /* In both cases, count the number of names we've encountered. */
 
-        cd->names_found++;
         ptr++;                    /* Move past > or ' */
+        cd->names_found++;
         goto NUMBERED_GROUP;
 
 
@@ -5134,8 +5002,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           if (lengthptr == NULL)
             {
             *code = OP_END;
-            if (recno != 0)
-              called = _pcre_find_bracket(cd->start_code, utf8, recno);
+            if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);
 
             /* Forward reference */
 
@@ -5251,7 +5118,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             {
             cd->external_options = newoptions;
             }
-          else
+         else
             {
             if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
               {
@@ -5588,7 +5455,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       if (-c >= ESC_REF)
         {
-        open_capitem *oc;
         recno = -c - ESC_REF;
 
         HANDLE_REFERENCE:    /* Come here from named backref handling */
@@ -5598,19 +5464,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         PUT2INC(code, 0, recno);
         cd->backref_map |= (recno < 32)? (1 << recno) : 1;
         if (recno > cd->top_backref) cd->top_backref = recno;
-
-        /* Check to see if this back reference is recursive, that it, it
-        is inside the group that it references. A flag is set so that the
-        group can be made atomic. */
-
-        for (oc = cd->open_caps; oc != NULL; oc = oc->next)
-          {
-          if (oc->number == recno)
-            {
-            oc->flag = TRUE;
-            break;
-            }
-          }
         }
 
       /* So are Unicode property matches, if supported. */
@@ -5793,18 +5646,15 @@ uschar *code = *codeptr;
 uschar *last_branch = code;
 uschar *start_bracket = code;
 uschar *reverse_count = NULL;
-open_capitem capitem;
-int capnumber = 0;
 int firstbyte, reqbyte;
 int branchfirstbyte, branchreqbyte;
 int length;
 int orig_bracount;
 int max_bracount;
-int old_external_options = cd->external_options;
 branch_chain bc;
 
 bc.outer = bcptr;
-bc.current_branch = code;
+bc.current = code;
 
 firstbyte = reqbyte = REQ_UNSET;
 
@@ -5822,19 +5672,6 @@ the code that abstracts option settings at the start of the pattern and makes
 them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
 pre-compile phase to find out whether anything has yet been compiled or not. */
 
-/* If this is a capturing subpattern, add to the chain of open capturing items
-so that we can detect them if (*ACCEPT) is encountered. This is also used to
-detect groups that contain recursive back references to themselves. */
-
-if (*code == OP_CBRA)
-  {
-  capnumber = GET2(code, 1 + LINK_SIZE);
-  capitem.number = capnumber;
-  capitem.next = cd->open_caps;
-  capitem.flag = FALSE;
-  cd->open_caps = &capitem;
-  }
-
 /* Offset is set zero to mark that this bracket is still open */
 
 PUT(code, 1, 0);
@@ -5879,15 +5716,6 @@ for (;;)
     return FALSE;
     }
 
-  /* If the external options have changed during this branch, it means that we
-  are at the top level, and a leading option setting has been encountered. We
-  need to re-set the original option values to take account of this so that,
-  during the pre-compile phase, we know to allow for a re-set at the start of
-  subsequent branches. */
-
-  if (old_external_options != cd->external_options)
-    oldims = cd->external_options & PCRE_IMS;
-
   /* Keep the highest bracket count in case (?| was used and some branch
   has fewer than the rest. */
 
@@ -5938,29 +5766,21 @@ for (;;)
 
     /* If lookbehind, check that this branch matches a fixed-length string, and
     put the length into the OP_REVERSE item. Temporarily mark the end of the
-    branch with OP_END. If the branch contains OP_RECURSE, the result is -3
-    because there may be forward references that we can't check here. Set a
-    flag to cause another lookbehind check at the end. Why not do it all at the
-    end? Because common, erroneous checks are picked up here and the offset of
-    the problem can be shown. */
+    branch with OP_END. */
 
     if (lookbehind)
       {
       int fixed_length;
       *code = OP_END;
-      fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
+      fixed_length = find_fixedlength(last_branch, options);
       DPRINTF(("fixed length = %d\n", fixed_length));
-      if (fixed_length == -3)
-        {
-        cd->check_lookbehind = TRUE;
-        }
-      else if (fixed_length < 0)
+      if (fixed_length < 0)
         {
         *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
         *ptrptr = ptr;
         return FALSE;
         }
-      else { PUT(reverse_count, 0, fixed_length); }
+      PUT(reverse_count, 0, fixed_length);
       }
     }
 
@@ -5994,28 +5814,7 @@ for (;;)
     PUT(code, 1, code - start_bracket);
     code += 1 + LINK_SIZE;
 
-    /* If it was a capturing subpattern, check to see if it contained any
-    recursive back references. If so, we must wrap it in atomic brackets.
-    In any event, remove the block from the chain. */
-
-    if (capnumber > 0)
-      {
-      if (cd->open_caps->flag)
-        {
-        memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
-          code - start_bracket);
-        *start_bracket = OP_ONCE;
-        code += 1 + LINK_SIZE;
-        PUT(start_bracket, 1, code - start_bracket);
-        *code = OP_KET;
-        PUT(code, 1, code - start_bracket);
-        code += 1 + LINK_SIZE;
-        length += 2 + 2*LINK_SIZE;
-        }
-      cd->open_caps = cd->open_caps->next;
-      }
-
-    /* Reset options if needed. */
+    /* Resetting option if needed */
 
     if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
       {
@@ -6064,7 +5863,7 @@ for (;;)
     {
     *code = OP_ALT;
     PUT(code, 1, code - last_branch);
-    bc.current_branch = last_branch = code;
+    bc.current = last_branch = code;
     code += 1 + LINK_SIZE;
     }
 
@@ -6211,9 +6010,7 @@ do {
      switch (*scode)
        {
        case OP_CREF:
-       case OP_NCREF:
        case OP_RREF:
-       case OP_NRREF:
        case OP_DEF:
        return FALSE;
 
@@ -6382,7 +6179,9 @@ int length = 1;  /* For final END opcode */
 int firstbyte, reqbyte, newline;
 int errorcode = 0;
 int skipatstart = 0;
-BOOL utf8 = (options & PCRE_UTF8) != 0;
+#ifdef SUPPORT_UTF8
+BOOL utf8;
+#endif
 size_t size;
 uschar *code;
 const uschar *codestart;
@@ -6479,14 +6278,15 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
 /* Can't support UTF8 unless PCRE has been compiled to include the code. */
 
 #ifdef SUPPORT_UTF8
+utf8 = (options & PCRE_UTF8) != 0;
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
-     (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
+     (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
   {
   errorcode = ERR44;
   goto PCRE_EARLY_ERROR_RETURN2;
   }
 #else
-if (utf8)
+if ((options & PCRE_UTF8) != 0)
   {
   errorcode = ERR32;
   goto PCRE_EARLY_ERROR_RETURN;
@@ -6575,7 +6375,6 @@ cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
 cd->req_varyopt = 0;
 cd->external_options = options;
 cd->external_flags = 0;
-cd->open_caps = NULL;
 
 /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
 don't need to look at the result of the function here. The initial options have
@@ -6650,8 +6449,6 @@ cd->start_code = codestart;
 cd->hwm = cworkspace;
 cd->req_varyopt = 0;
 cd->had_accept = FALSE;
-cd->check_lookbehind = FALSE;
-cd->open_caps = NULL;
 
 /* Set up a starting, non-extracting bracket, then compile the expression. On
 error, errorcode will be set non-zero, so we don't need to look at the result
@@ -6677,7 +6474,7 @@ if debugging, leave the test till after things are printed out. */
 
 *code++ = OP_END;
 
-#ifndef PCRE_DEBUG
+#ifndef DEBUG
 if (code - codestart > length) errorcode = ERR23;
 #endif
 
@@ -6690,7 +6487,7 @@ while (errorcode == 0 && cd->hwm > cworkspace)
   cd->hwm -= LINK_SIZE;
   offset = GET(cd->hwm, 0);
   recno = GET(codestart, offset);
-  groupptr = _pcre_find_bracket(codestart, utf8, recno);
+  groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);
   if (groupptr == NULL) errorcode = ERR53;
     else PUT(((uschar *)codestart), offset, groupptr - codestart);
   }
@@ -6700,47 +6497,6 @@ subpattern. */
 
 if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
 
-/* If there were any lookbehind assertions that contained OP_RECURSE
-(recursions or subroutine calls), a flag is set for them to be checked here,
-because they may contain forward references. Actual recursions can't be fixed
-length, but subroutine calls can. It is done like this so that those without
-OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
-exceptional ones forgo this. We scan the pattern to check that they are fixed
-length, and set their lengths. */
-
-if (cd->check_lookbehind)
-  {
-  uschar *cc = (uschar *)codestart;
-
-  /* Loop, searching for OP_REVERSE items, and process those that do not have
-  their length set. (Actually, it will also re-process any that have a length
-  of zero, but that is a pathological case, and it does no harm.) When we find
-  one, we temporarily terminate the branch it is in while we scan it. */
-
-  for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
-       cc != NULL;
-       cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
-    {
-    if (GET(cc, 1) == 0)
-      {
-      int fixed_length;
-      uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
-      int end_op = *be;
-      *be = OP_END;
-      fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
-      *be = end_op;
-      DPRINTF(("fixed length = %d\n", fixed_length));
-      if (fixed_length < 0)
-        {
-        errorcode = (fixed_length == -2)? ERR36 : ERR25;
-        break;
-        }
-      PUT(cc, 1, fixed_length);
-      }
-    cc += 1 + LINK_SIZE;
-    }
-  }
-
 /* Failed to compile, or error while post-processing */
 
 if (errorcode != 0)
@@ -6801,7 +6557,7 @@ if (reqbyte >= 0 &&
 /* Print out the compiled data if debugging is enabled. This is never the
 case when building a production library. */
 
-#ifdef PCRE_DEBUG
+#ifdef DEBUG
 
 printf("Length = %d top_bracket = %d top_backref = %d\n",
   length, re->top_bracket, re->top_backref);
@@ -6839,7 +6595,7 @@ if (code - codestart > length)
   if (errorcodeptr != NULL) *errorcodeptr = ERR23;
   return NULL;
   }
-#endif   /* PCRE_DEBUG */
+#endif   /* DEBUG */
 
 return (pcre *)re;
 }
author	Ilia Alshanetsky <iliaa@php.net>	2010-02-03 12:59:00 +0000
committer	Ilia Alshanetsky <iliaa@php.net>	2010-02-03 12:59:00 +0000
commit	91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8 (patch)
tree	1817d9652f110cf0cd4644ed8586ca56d0c05d9e /ext/pcre/pcrelib/pcre_compile.c
parent	43d5429381237518ced74149f29a851c30307bea (diff)
download	php-git-91eb2dea648f8ed0f14f60cd02b5c1e911c2adf8.tar.gz