1 files changed, 51 insertions, 97 deletions
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index eff51c7048..e28fe9ec86 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -156,39 +156,13 @@ printf("\n");
 
 if (length > md->end_subject - eptr) return FALSE;
 
-/* Separate the caseless case for speed. In UTF-8 mode we can only do this
-properly if Unicode properties are supported. Otherwise, we can check only
-ASCII characters. */
+/* Separate the caselesss case for speed */
 
 if ((ims & PCRE_CASELESS) != 0)
   {
-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
-  if (md->utf8)
-    {
-    USPTR endptr = eptr + length;
-    while (eptr < endptr)
-      {
-      int c, d;
-      GETCHARINC(c, eptr);
-      GETCHARINC(d, p);
-      if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
-      }
-    }
-  else
-#endif
-#endif
-
-  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
-  is no UCP support. */
-
   while (length-- > 0)
-    { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
+    if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
   }
-
-/* In the caseful case, we can just compare the bytes, whether or not we
-are in UTF-8 mode. */
-
 else
   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 
@@ -1677,7 +1651,8 @@ for (;;)
     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     GETCHARINCTEST(c, eptr);
       {
-      const ucd_record * prop = GET_UCD(c);
+      int chartype, script;
+      int category = _pcre_ucp_findprop(c, &chartype, &script);
 
       switch(ecode[1])
         {
@@ -1686,24 +1661,24 @@ for (;;)
         break;
 
         case PT_LAMP:
-        if ((prop->chartype == ucp_Lu ||
-             prop->chartype == ucp_Ll ||
-             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
+        if ((chartype == ucp_Lu ||
+             chartype == ucp_Ll ||
+             chartype == ucp_Lt) == (op == OP_NOTPROP))
           RRETURN(MATCH_NOMATCH);
          break;
 
         case PT_GC:
-        if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
+        if ((ecode[2] != category) == (op == OP_PROP))
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_PC:
-        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
+        if ((ecode[2] != chartype) == (op == OP_PROP))
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_SC:
-        if ((ecode[2] != prop->script) == (op == OP_PROP))
+        if ((ecode[2] != script) == (op == OP_PROP))
           RRETURN(MATCH_NOMATCH);
         break;
 
@@ -1722,7 +1697,8 @@ for (;;)
     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     GETCHARINCTEST(c, eptr);
       {
-      int category = UCD_CATEGORY(c);
+      int chartype, script;
+      int category = _pcre_ucp_findprop(c, &chartype, &script);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
       while (eptr < md->end_subject)
         {
@@ -1731,7 +1707,7 @@ for (;;)
           {
           GETCHARLEN(c, eptr, len);
           }
-        category = UCD_CATEGORY(c);
+        category = _pcre_ucp_findprop(c, &chartype, &script);
         if (category != ucp_M) break;
         eptr += len;
         }
@@ -2196,7 +2172,7 @@ for (;;)
         if (fc != dc)
           {
 #ifdef SUPPORT_UCP
-          if (dc != UCD_OTHERCASE(fc))
+          if (dc != _pcre_ucp_othercase(fc))
 #endif
             RRETURN(MATCH_NOMATCH);
           }
@@ -2287,7 +2263,7 @@ for (;;)
 #ifdef SUPPORT_UCP
         unsigned int othercase;
         if ((ims & PCRE_CASELESS) != 0 &&
-            (othercase = UCD_OTHERCASE(fc)) != fc)
+            (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
           oclength = _pcre_ord2utf8(othercase, occhars);
         else oclength = 0;
 #endif  /* SUPPORT_UCP */
@@ -2607,11 +2583,10 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINC(d, eptr);
             if (d < 256) d = md->lcc[d];
-            if (fc == d) RRETURN(MATCH_NOMATCH);
-
+            if (fi >= max || eptr >= md->end_subject || fc == d)
+              RRETURN(MATCH_NOMATCH);
             }
           }
         else
@@ -2717,9 +2692,9 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINC(d, eptr);
-            if (fc == d) RRETURN(MATCH_NOMATCH);
+            if (fi >= max || eptr >= md->end_subject || fc == d)
+              RRETURN(MATCH_NOMATCH);
             }
           }
         else
@@ -2893,7 +2868,7 @@ for (;;)
             {
             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINCTEST(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_chartype == ucp_Lu ||
                  prop_chartype == ucp_Ll ||
                  prop_chartype == ucp_Lt) == prop_fail_result)
@@ -2906,7 +2881,7 @@ for (;;)
             {
             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_category == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
             }
@@ -2917,7 +2892,7 @@ for (;;)
             {
             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINCTEST(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_chartype == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
             }
@@ -2928,7 +2903,7 @@ for (;;)
             {
             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINCTEST(c, eptr);
-            prop_script = UCD_SCRIPT(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_script == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
             }
@@ -2947,7 +2922,7 @@ for (;;)
         for (i = 1; i <= min; i++)
           {
           GETCHARINCTEST(c, eptr);
-          prop_category = UCD_CATEGORY(c);
+          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
           while (eptr < md->end_subject)
             {
@@ -2956,7 +2931,7 @@ for (;;)
               {
               GETCHARLEN(c, eptr, len);
               }
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if (prop_category != ucp_M) break;
             eptr += len;
             }
@@ -3372,7 +3347,7 @@ for (;;)
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINC(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_chartype == ucp_Lu ||
                  prop_chartype == ucp_Ll ||
                  prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3387,7 +3362,7 @@ for (;;)
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINC(c, eptr);
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_category == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
             }
@@ -3400,7 +3375,7 @@ for (;;)
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINC(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_chartype == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
             }
@@ -3413,7 +3388,7 @@ for (;;)
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
             GETCHARINC(c, eptr);
-            prop_script = UCD_SCRIPT(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_script == prop_value) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
             }
@@ -3435,7 +3410,7 @@ for (;;)
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
           GETCHARINCTEST(c, eptr);
-          prop_category = UCD_CATEGORY(c);
+          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
           while (eptr < md->end_subject)
             {
@@ -3444,7 +3419,7 @@ for (;;)
               {
               GETCHARLEN(c, eptr, len);
               }
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if (prop_category != ucp_M) break;
             eptr += len;
             }
@@ -3762,7 +3737,7 @@ for (;;)
             int len = 1;
             if (eptr >= md->end_subject) break;
             GETCHARLEN(c, eptr, len);
-            prop_chartype = UCD_CHARTYPE(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_chartype == ucp_Lu ||
                  prop_chartype == ucp_Ll ||
                  prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3777,7 +3752,7 @@ for (;;)
             int len = 1;
             if (eptr >= md->end_subject) break;
             GETCHARLEN(c, eptr, len);
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_category == prop_value) == prop_fail_result)
               break;
             eptr+= len;
@@ -3790,7 +3765,7 @@ for (;;)
             int len = 1;
             if (eptr >= md->end_subject) break;
             GETCHARLEN(c, eptr, len);
-            prop_chartype = UCD_CHARTYPE(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_chartype == prop_value) == prop_fail_result)
               break;
             eptr+= len;
@@ -3803,7 +3778,7 @@ for (;;)
             int len = 1;
             if (eptr >= md->end_subject) break;
             GETCHARLEN(c, eptr, len);
-            prop_script = UCD_SCRIPT(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if ((prop_script == prop_value) == prop_fail_result)
               break;
             eptr+= len;
@@ -3832,7 +3807,7 @@ for (;;)
           {
           if (eptr >= md->end_subject) break;
           GETCHARINCTEST(c, eptr);
-          prop_category = UCD_CATEGORY(c);
+          prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
           if (prop_category == ucp_M) break;
           while (eptr < md->end_subject)
             {
@@ -3841,7 +3816,7 @@ for (;;)
               {
               GETCHARLEN(c, eptr, len);
               }
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if (prop_category != ucp_M) break;
             eptr += len;
             }
@@ -3863,7 +3838,7 @@ for (;;)
               BACKCHAR(eptr);
               GETCHARLEN(c, eptr, len);
               }
-            prop_category = UCD_CATEGORY(c);
+            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
             if (prop_category != ucp_M) break;
             eptr--;
             }
@@ -4383,7 +4358,7 @@ Returns:          > 0 => success; value is the number of elements filled in
                  < -1 => some kind of unexpected problem
 */
 
-PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+PCRE_EXP_DEFN int
 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
   int offsetcount)
@@ -4695,53 +4670,31 @@ for(;;)
   if (firstline)
     {
     USPTR t = start_match;
-#ifdef SUPPORT_UTF8
-    if (utf8)
-      {
-      while (t < md->end_subject && !IS_NEWLINE(t))
-        {
-        t++;
-        while (t < end_subject && (*t & 0xc0) == 0x80) t++;
-        }
-      }
-    else
-#endif
     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
     end_subject = t;
     }
 
-  /* Now advance to a unique first byte if there is one. */
+  /* Now test for a unique first byte */
 
   if (first_byte >= 0)
     {
     if (first_byte_caseless)
-      while (start_match < end_subject && md->lcc[*start_match] != first_byte)
-        start_match++;
+      while (start_match < end_subject &&
+             md->lcc[*start_match] != first_byte)
+        { NEXTCHAR(start_match); }
     else
       while (start_match < end_subject && *start_match != first_byte)
-        start_match++;
+        { NEXTCHAR(start_match); }
     }
 
-  /* Or to just after a linebreak for a multiline match */
+  /* Or to just after a linebreak for a multiline match if possible */
 
   else if (startline)
     {
     if (start_match > md->start_subject + start_offset)
       {
-#ifdef SUPPORT_UTF8
-      if (utf8)
-        {
-        while (start_match < end_subject && !WAS_NEWLINE(start_match))
-          {
-          start_match++;
-          while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
-            start_match++;
-          }
-        }
-      else
-#endif
-      while (start_match < end_subject && !WAS_NEWLINE(start_match))
-        start_match++;
+      while (start_match <= end_subject && !WAS_NEWLINE(start_match))
+        { NEXTCHAR(start_match); }
 
       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
       and we are now at a LF, advance the match position by one more character.
@@ -4755,15 +4708,16 @@ for(;;)
       }
     }
 
-  /* Or to a non-unique first byte after study */
+  /* Or to a non-unique first char after study */
 
   else if (start_bits != NULL)
     {
     while (start_match < end_subject)
       {
       register unsigned int c = *start_match;
-      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
-        else break;
+      if ((start_bits[c/8] & (1 << (c&7))) == 0)
+        { NEXTCHAR(start_match); }
+      else break;
       }
     }