diff options
| author | Anatol Belski <ab@php.net> | 2018-09-18 14:16:06 +0200 | 
|---|---|---|
| committer | Anatol Belski <ab@php.net> | 2018-09-18 14:16:52 +0200 | 
| commit | d918e0776b5168aed2707b0ca500589844f0faa8 (patch) | |
| tree | 163b322349a4eab8ba37942c7186b5ec4f8dda2f /ext/pcre/pcre2lib/pcre2_compile.c | |
| parent | 4f36acb9e65935aa657f1f22e2320a401bdbdad3 (diff) | |
| download | php-git-d918e0776b5168aed2707b0ca500589844f0faa8.tar.gz | |
Upgrade PCRE2 to 10.32
Diffstat (limited to 'ext/pcre/pcre2lib/pcre2_compile.c')
| -rw-r--r-- | ext/pcre/pcre2lib/pcre2_compile.c | 336 | 
1 files changed, 233 insertions, 103 deletions
| diff --git a/ext/pcre/pcre2lib/pcre2_compile.c b/ext/pcre/pcre2lib/pcre2_compile.c index 87530fb584..6bb1de3610 100644 --- a/ext/pcre/pcre2lib/pcre2_compile.c +++ b/ext/pcre/pcre2lib/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.                         Written by Philip Hazel       Original API code Copyright (c) 1997-2012 University of Cambridge -          New API code Copyright (c) 2016-2017 University of Cambridge +          New API code Copyright (c) 2016-2018 University of Cambridge  -----------------------------------------------------------------------------  Redistribution and use in source and binary forms, with or without @@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.  /* Other debugging code can be enabled by these defines. */ -// #define DEBUG_SHOW_CAPTURES -// #define DEBUG_SHOW_PARSED +/* #define DEBUG_SHOW_CAPTURES */ +/* #define DEBUG_SHOW_PARSED */  /* There are a few things that vary with different code unit sizes. Handle them  by defining macros in order to minimize #if usage. */ @@ -250,34 +250,35 @@ is present where expected in a conditional group. */  #define META_LOOKBEHINDNOT    0x80250000u  /* (?<! */  /* These must be kept in this order, with consecutive values, and the _ARG -versions of PRUNE, SKIP, and THEN immediately after their non-argument +versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument  versions. */  #define META_MARK             0x80260000u  /* (*MARK) */  #define META_ACCEPT           0x80270000u  /* (*ACCEPT) */ -#define META_COMMIT           0x80280000u  /* (*COMMIT) */ -#define META_FAIL             0x80290000u  /* (*FAIL) */ -#define META_PRUNE            0x802a0000u  /* These pairs must    */ -#define META_PRUNE_ARG        0x802b0000u  /*   be                */ -#define META_SKIP             0x802c0000u  /*     kept            */ -#define META_SKIP_ARG         0x802d0000u  /*         in          */ -#define META_THEN             0x802e0000u  /*           this      */ -#define META_THEN_ARG         0x802f0000u  /*               order */ +#define META_FAIL             0x80280000u  /* (*FAIL) */ +#define META_COMMIT           0x80290000u  /* These               */ +#define META_COMMIT_ARG       0x802a0000u  /*   pairs             */ +#define META_PRUNE            0x802b0000u  /*     must            */ +#define META_PRUNE_ARG        0x802c0000u  /*       be            */ +#define META_SKIP             0x802d0000u  /*         kept        */ +#define META_SKIP_ARG         0x802e0000u  /*           in        */ +#define META_THEN             0x802f0000u  /*             this    */ +#define META_THEN_ARG         0x80300000u  /*               order */  /* These must be kept in groups of adjacent 3 values, and all together. */ -#define META_ASTERISK         0x80300000u  /* *  */ -#define META_ASTERISK_PLUS    0x80310000u  /* *+ */ -#define META_ASTERISK_QUERY   0x80320000u  /* *? */ -#define META_PLUS             0x80330000u  /* +  */ -#define META_PLUS_PLUS        0x80340000u  /* ++ */ -#define META_PLUS_QUERY       0x80350000u  /* +? */ -#define META_QUERY            0x80360000u  /* ?  */ -#define META_QUERY_PLUS       0x80370000u  /* ?+ */ -#define META_QUERY_QUERY      0x80380000u  /* ?? */ -#define META_MINMAX           0x80390000u  /* {n,m}  repeat */ -#define META_MINMAX_PLUS      0x803a0000u  /* {n,m}+ repeat */ -#define META_MINMAX_QUERY     0x803b0000u  /* {n,m}? repeat */ +#define META_ASTERISK         0x80310000u  /* *  */ +#define META_ASTERISK_PLUS    0x80320000u  /* *+ */ +#define META_ASTERISK_QUERY   0x80330000u  /* *? */ +#define META_PLUS             0x80340000u  /* +  */ +#define META_PLUS_PLUS        0x80350000u  /* ++ */ +#define META_PLUS_QUERY       0x80360000u  /* +? */ +#define META_QUERY            0x80370000u  /* ?  */ +#define META_QUERY_PLUS       0x80380000u  /* ?+ */ +#define META_QUERY_QUERY      0x80390000u  /* ?? */ +#define META_MINMAX           0x803a0000u  /* {n,m}  repeat */ +#define META_MINMAX_PLUS      0x803b0000u  /* {n,m}+ repeat */ +#define META_MINMAX_QUERY     0x803c0000u  /* {n,m}? repeat */  #define META_FIRST_QUANTIFIER META_ASTERISK  #define META_LAST_QUANTIFIER  META_MINMAX_QUERY @@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {    SIZEOFFSET,    /* META_LOOKBEHINDNOT */    1,             /* META_MARK - plus the string length */    0,             /* META_ACCEPT */ -  0,             /* META_COMMIT */    0,             /* META_FAIL */ +  0,             /* META_COMMIT */ +  1,             /* META_COMMIT_ARG - plus the string length */    0,             /* META_PRUNE */    1,             /* META_PRUNE_ARG - plus the string length */    0,             /* META_SKIP */ @@ -510,17 +512,17 @@ static const short int escapes[] = {       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE, -     CHAR_GRAVE_ACCENT,       ESC_a, +     CHAR_GRAVE_ACCENT,       CHAR_BEL,       -ESC_b,                  0, -     -ESC_d,                  ESC_e, -     ESC_f,                   0, +     -ESC_d,                  CHAR_ESC, +     CHAR_FF,                 0,       -ESC_h,                  0,       0,                       -ESC_k,       0,                       0, -     ESC_n,                   0, +     CHAR_LF,                 0,       -ESC_p,                  0, -     ESC_r,                   -ESC_s, -     ESC_tee,                 0, +     CHAR_CR,                 -ESC_s, +     CHAR_HT,                 0,       -ESC_v,                  -ESC_w,       0,                       0,       -ESC_z @@ -544,22 +546,22 @@ because it is defined as 'a', which of course picks up the ASCII value. */  #endif  static const short int escapes[] = { -/*  80 */        ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0, -/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0, -/*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p, -/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0, -/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0, -/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0, -/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0, -/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-', -/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G, -/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0, -/*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P, -/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0, -/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X, -/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0, -/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0, -/*  F8 */     0,     0 +/*  80 */         CHAR_BEL, -ESC_b,       0, -ESC_d, CHAR_ESC, CHAR_FF,      0, +/*  88 */ -ESC_h,        0,      0,     '{',      0,        0,       0,      0, +/*  90 */      0,        0, -ESC_k,       0,      0,  CHAR_LF,       0, -ESC_p, +/*  98 */      0,  CHAR_CR,      0,     '}',      0,        0,       0,      0, +/*  A0 */      0,      '~', -ESC_s, CHAR_HT,      0,   -ESC_v,  -ESC_w,      0, +/*  A8 */      0,   -ESC_z,      0,       0,      0,      '[',       0,      0, +/*  B0 */      0,        0,      0,       0,      0,        0,       0,      0, +/*  B8 */      0,        0,      0,       0,      0,      ']',     '=',    '-', +/*  C0 */    '{',   -ESC_A, -ESC_B,  -ESC_C, -ESC_D,   -ESC_E,       0, -ESC_G, +/*  C8 */ -ESC_H,        0,      0,       0,      0,        0,       0,      0, +/*  D0 */    '}',        0, -ESC_K,       0,      0,   -ESC_N,       0, -ESC_P, +/*  D8 */ -ESC_Q,   -ESC_R,      0,       0,      0,        0,       0,      0, +/*  E0 */   '\\',        0, -ESC_S,       0,      0,   -ESC_V,  -ESC_W, -ESC_X, +/*  E8 */      0,   -ESC_Z,      0,       0,      0,        0,       0,      0, +/*  F0 */      0,        0,      0,       0,      0,        0,       0,      0, +/*  F8 */      0,        0  };  /* We also need a table of characters that may follow \c in an EBCDIC @@ -586,9 +588,9 @@ static const char verbnames[] =    "\0"                       /* Empty name is a shorthand for MARK */    STRING_MARK0    STRING_ACCEPT0 -  STRING_COMMIT0    STRING_F0    STRING_FAIL0 +  STRING_COMMIT0    STRING_PRUNE0    STRING_SKIP0    STRING_THEN; @@ -596,11 +598,11 @@ static const char verbnames[] =  static const verbitem verbs[] = {    { 0, META_MARK,   +1 },  /* > 0 => must have an argument */    { 4, META_MARK,   +1 }, -  { 6, META_ACCEPT, -1 },  /* < 0 => must not have an argument */ -  { 6, META_COMMIT, -1 }, +  { 6, META_ACCEPT, -1 },  /* < 0 => Optional argument, convert to pre-MARK */    { 1, META_FAIL,   -1 },    { 4, META_FAIL,   -1 }, -  { 5, META_PRUNE,   0 },  /* Argument is optional; bump META code if found */ +  { 6, META_COMMIT,  0 }, +  { 5, META_PRUNE,   0 },  /* Optional argument; bump META code if found */    { 4, META_SKIP,    0 },    { 4, META_THEN,    0 }  }; @@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);  /* Verb opcodes, indexed by their META code offset from META_MARK. */  static const uint32_t verbops[] = { -  OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP, -  OP_SKIP_ARG, OP_THEN, OP_THEN_ARG }; +  OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE, +  OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };  /* Offsets from OP_STAR for case-independent and negative repeat opcodes. */ @@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,         ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,         ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,         ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, -       ERR91, ERR92}; +       ERR91, ERR92, ERR93, ERR94 };  /* This is a table of start-of-pattern options such as (*UTF) and settings such  as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -976,8 +978,8 @@ for (;;)      case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;      case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break; -    case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;      case META_FAIL: fprintf(stderr, "META (*FAIL)"); break; +    case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;      case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;      case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;      case META_THEN: fprintf(stderr, "META (*THEN)"); break; @@ -1067,6 +1069,10 @@ for (;;)      fprintf(stderr, "META (*MARK:");      goto SHOWARG; +    case META_COMMIT_ARG: +    fprintf(stderr, "META (*COMMIT:"); +    goto SHOWARG; +      case META_PRUNE_ARG:      fprintf(stderr, "META (*PRUNE:");      goto SHOWARG; @@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)      escape = -i;                    /* Else return a special escape */      if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))        cb->external_flags |= PCRE2_HASBKPORX;   /* Note \P, \p, or \X */ + +    /* Perl supports \N{name} for character names and \N{U+dddd} for numerical +    Unicode code points, as well as plain \N for "not newline". PCRE does not +    support \N{name}. However, it does support quantification such as \N{2,3}, +    so if \N{ is not followed by U+dddd we check for a quantifier. */ + +    if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) +      { +      PCRE2_SPTR p = ptr + 1; + +      /* \N{U+ can be handled by the \x{ code. However, this construction is +      not valid in EBCDIC environments because it specifies a Unicode +      character, not a codepoint in the local code. For example \N{U+0041} +      must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode +      casing semantics for the entire pattern, so allow it only in UTF (i.e. +      Unicode) mode. */ + +      if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) +        { +#ifdef EBCDIC +        *errorcodeptr = ERR93; +#else +        if (utf) +          { +          ptr = p + 1; +          escape = 0;   /* Not a fancy escape after all */ +          goto COME_FROM_NU; +          } +        else *errorcodeptr = ERR93; +#endif +        } + +      /* Give an error if what follows is not a quantifier, but don't override +      an error set by the quantifier reader (e.g. number overflow). */ + +      else +        { +        if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && +             *errorcodeptr == 0) +          *errorcodeptr = ERR37; +        } +      }      }    } @@ -1462,6 +1510,7 @@ else      /* A number of Perl escapes are not handled by PCRE. We give an explicit      error. */ +    case CHAR_F:      case CHAR_l:      case CHAR_L:      *errorcodeptr = ERR37; @@ -1719,6 +1768,9 @@ else        {        if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)          { +#ifndef EBCDIC +        COME_FROM_NU: +#endif          if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)            {            *errorcodeptr = ERR78; @@ -1852,19 +1904,6 @@ else      }    } -/* Perl supports \N{name} for character names, as well as plain \N for "not -newline". PCRE does not support \N{name}. However, it does support -quantification such as \N{2,3}. */ - -if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET && -    ptrend - ptr > 2) -  { -  PCRE2_SPTR p = ptr + 1; -  if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && -       *errorcodeptr == 0) -    *errorcodeptr = ERR37; -  } -  /* Set the pointer to the next character before returning. */  *ptrptr = ptr; @@ -2251,11 +2290,14 @@ typedef struct nest_save {  #define NSF_RESET          0x0001u  #define NSF_CONDASSERT     0x0002u -/* Of the options that are changeable within the pattern, these are tracked -during parsing. The rest are used from META_OPTIONS items when compiling. */ +/* Options that are changeable within the pattern must be tracked during +parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing, +but all must be tracked so that META_OPTIONS items set the correct values for +the main compiling phase. */ -#define PARSE_TRACKED_OPTIONS \ -  (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE) +#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ +  PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ +  PCRE2_UNGREEDY)  /* States used for analyzing ranges in character classes. The two OK values  must be last. */ @@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;  uint32_t *parsed_pattern = cb->parsed_pattern;  uint32_t *parsed_pattern_end = cb->parsed_pattern_end;  uint32_t meta_quantifier = 0; +uint32_t add_after_mark = 0;  uint16_t nest_depth = 0;  int after_manual_callout = 0;  int expect_cond_assert = 0; @@ -2434,11 +2477,17 @@ while (ptr < ptrend)          /* EITHER: not both options set */          ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=                      (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || -        /* OR: character > 255 */ -        c > 255 || -        /* OR: not a # comment or white space */ -        (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0) -       )) +#ifdef SUPPORT_UNICODE +        /* OR: character > 255 AND not Unicode Pattern White Space */ +        (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) || +#endif +        /* OR: not a # comment or isspace() white space */ +        (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0 +#ifdef SUPPORT_UNICODE +        /* and not CHAR_NEL when Unicode is supported */ +          && c != CHAR_NEL +#endif +       )))      {      PCRE2_SIZE verbnamelength; @@ -2461,6 +2510,16 @@ while (ptr < ptrend)          goto FAILED;          }        *verblengthptr = (uint32_t)verbnamelength; + +      /* If this name was on a verb such as (*ACCEPT) which does not continue, +      a (*MARK) was generated for the name. We now add the original verb as the +      next item. */ + +      if (add_after_mark != 0) +        { +        *parsed_pattern++ = add_after_mark; +        add_after_mark = 0; +        }        break;        case CHAR_BACKSLASH: @@ -2510,11 +2569,18 @@ while (ptr < ptrend)    /* Skip over whitespace and # comments in extended mode. Note that c is a    character, not a code unit, so we must not use MAX_255 to test its size -  because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */ +  because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The +  whitespace characters are those designated as "Pattern White Space" by +  Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is +  U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a +  subset of space characters that match \h and \v. */    if ((options & PCRE2_EXTENDED) != 0)      {      if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; +#ifdef SUPPORT_UNICODE +    if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue; +#endif      if (c == CHAR_NUMBER_SIGN)        {        while (ptr < ptrend) @@ -3206,7 +3272,6 @@ while (ptr < ptrend)          tempptr = ptr;          escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,            options, TRUE, cb); -          if (errorcode != 0)            {            CLASS_ESCAPE_FAILED: @@ -3454,13 +3519,25 @@ while (ptr < ptrend)          if (*ptr++ == CHAR_COLON)   /* Skip past : or ) */            { -          if (verbs[i].has_arg < 0)  /* Argument is forbidden */ +          /* Some optional arguments can be treated as a preceding (*MARK) */ + +          if (verbs[i].has_arg < 0)              { -            errorcode = ERR59; -            goto FAILED; +            add_after_mark = verbs[i].meta; +            *parsed_pattern++ = META_MARK;              } -          *parsed_pattern++ = verbs[i].meta + -            ((verbs[i].meta != META_MARK)? 0x00010000u:0); + +          /* The remaining verbs with arguments (except *MARK) need a different +          opcode. */ + +          else +            { +            *parsed_pattern++ = verbs[i].meta + +              ((verbs[i].meta != META_MARK)? 0x00010000u:0); +            } + +          /* Set up for reading the name in the main loop. */ +            verblengthptr = parsed_pattern++;            verbnamestart = ptr;            inverbname = TRUE; @@ -3521,17 +3598,39 @@ while (ptr < ptrend)        else          { +        BOOL hyphenok = TRUE; +        uint32_t oldoptions = options; +          top_nest->reset_group = 0;          top_nest->max_group = 0;          set = unset = 0;          optset = &set; +        /* ^ at the start unsets imnsx and disables the subsequent use of - */ + +        if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT) +          { +          options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| +                       PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); +          hyphenok = FALSE; +          ptr++; +          } +          while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&                                 *ptr != CHAR_COLON)            {            switch (*ptr++)              { -            case CHAR_MINUS: optset = &unset; break; +            case CHAR_MINUS: +            if (!hyphenok) +              { +              errorcode = ERR94; +              ptr--;  /* Correct the offset */ +              goto FAILED; +              } +            optset = &unset; +            hyphenok = FALSE; +            break;              case CHAR_J:  /* Record that it changed in the external options */              *optset |= PCRE2_DUPNAMES; @@ -3591,7 +3690,7 @@ while (ptr < ptrend)          /* If nothing changed, no need to record. */ -        if (set != 0 || unset != 0) +        if (options != oldoptions)            {            *parsed_pattern++ = META_OPTIONS;            *parsed_pattern++ = options; @@ -3896,9 +3995,8 @@ while (ptr < ptrend)          if (*ptr == CHAR_DOT)            {            if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; -          if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode)) -            goto FAILED; -          if (minor < 10) minor *= 10; +          minor = (*ptr++ - CHAR_0) * 10; +          if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;            if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)              goto BAD_VERSION_CONDITION;            } @@ -4261,11 +4359,11 @@ goto FAILED;  /************************************************* -*      Find first significant op code            * +*       Find first significant opcode            *  *************************************************/  /* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things +for a fixed first character, or an anchoring opcode etc. It skips over things  that do not influence this. For some calls, it makes sense to skip negative  forward and all backward assertions, and also the \b assertion; for others it  does not. @@ -5472,7 +5570,7 @@ for (;; pptr++)        set xclass = TRUE. Then, in the pre-compile phase, accumulate the length        of the extra data and reset the pointer. This is so that very large        classes that contain a zillion wide characters or Unicode property tests -      do not overwrite the work space (which is on the stack). */ +      do not overwrite the workspace (which is on the stack). */        if (class_uchardata > class_uchardata_base)          { @@ -5563,7 +5661,7 @@ for (;; pptr++)        if (class_has_8bitchar > 0)          {          *code++ |= XCL_MAP; -        memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, +        (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,            CU2BYTES(class_uchardata - code));          if (negate_class && !xclass_has_prop)            for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; @@ -5655,6 +5753,7 @@ for (;; pptr++)      cb->had_pruneorskip = TRUE;      /* Fall through */      case META_MARK: +    case META_COMMIT_ARG:      VERB_ARG:      *code++ = verbops[(meta - META_MARK) >> 16];      /* The length is in characters. */ @@ -6509,7 +6608,7 @@ for (;; pptr++)        /* Wrap the recursion call in OP_BRA brackets. */ -      memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); +      (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));        op_previous = *previous = OP_BRA;        PUT(previous, 1, 2 + 2*LINK_SIZE);        previous[2 + 2*LINK_SIZE] = OP_KET; @@ -6589,7 +6688,7 @@ for (;; pptr++)            if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)              { -            memmove(previous + 1, previous, CU2BYTES(len)); +            (void)memmove(previous + 1, previous, CU2BYTES(len));              code++;              if (repeat_max == 0)                { @@ -6610,7 +6709,7 @@ for (;; pptr++)            else              {              int linkoffset; -            memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); +            (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));              code += 2 + LINK_SIZE;              *previous++ = OP_BRAZERO + repeat_type;              *previous++ = OP_BRA; @@ -6811,7 +6910,7 @@ for (;; pptr++)                if (*bracode == OP_COND || *bracode == OP_SCOND)                  {                  int nlen = (int)(code - bracode); -                memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); +                (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));                  code += 1 + LINK_SIZE;                  nlen += 1 + LINK_SIZE;                  *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; @@ -7082,7 +7181,7 @@ for (;; pptr++)          else            { -          memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); +          (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));            code += 1 + LINK_SIZE;            len += 1 + LINK_SIZE;            tempcode[0] = OP_ONCE; @@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the  beginning. We accumulate in a local variable to save frequent testing of  lengthptr for NULL. We cannot do this by looking at the value of 'code' at the  start and end of each alternative, because compiled items are discarded during -the pre-compile phase so that the work space is not exceeded. */ +the pre-compile phase so that the workspace is not exceeded. */  length = 2 + 2*LINK_SIZE + skipunits; @@ -7622,7 +7721,7 @@ for (;;)        {        if (cb->open_caps->flag)          { -        memmove(start_bracket + 1 + LINK_SIZE, start_bracket, +        (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,            CU2BYTES(code - start_bracket));          *start_bracket = OP_ONCE;          code += 1 + LINK_SIZE; @@ -7765,10 +7864,11 @@ do {       if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;       } -   /* Condition */ +   /* Condition. If there is no second branch, it can't be anchored. */ -   else if (op == OP_COND) +   else if (op == OP_COND || op == OP_SCOND)       { +     if (scode[GET(scode,1)] != OP_ALT) return FALSE;       if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))         return FALSE;       } @@ -8003,6 +8103,7 @@ for (;;)        break;        case OP_MARK: +      case OP_COMMIT_ARG:        case OP_PRUNE_ARG:        case OP_SKIP_ARG:        case OP_THEN_ARG: @@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)    if (crc < 0)      { -    memmove(slot + cb->name_entry_size, slot, +    (void)memmove(slot + cb->name_entry_size, slot,        CU2BYTES((tablecount - i) * cb->name_entry_size));      break;      } @@ -8311,6 +8412,7 @@ for (;; pptr++)      break;      case META_MARK:     /* Add the length of the name. */ +    case META_COMMIT_ARG:      case META_PRUNE_ARG:      case META_SKIP_ARG:      case META_THEN_ARG: @@ -8501,6 +8603,7 @@ for (;; pptr++)      goto EXIT;      case META_MARK: +    case META_COMMIT_ARG:      case META_PRUNE_ARG:      case META_SKIP_ARG:      case META_THEN_ARG: @@ -8572,6 +8675,32 @@ for (;; pptr++)      case META_LOOKAHEADNOT:      pptr = parsed_skip(pptr + 1, PSKIP_KET);      if (pptr == NULL) goto PARSED_SKIP_FAILED; + +    /* Also ignore any qualifiers that follow a lookahead assertion. */ + +    switch (pptr[1]) +      { +      case META_ASTERISK: +      case META_ASTERISK_PLUS: +      case META_ASTERISK_QUERY: +      case META_PLUS: +      case META_PLUS_PLUS: +      case META_PLUS_QUERY: +      case META_QUERY: +      case META_QUERY_PLUS: +      case META_QUERY_QUERY: +      pptr++; +      break; + +      case META_MINMAX: +      case META_MINMAX_PLUS: +      case META_MINMAX_QUERY: +      pptr += 3; +      break; + +      default: +      break; +      }      break;      /* Lookbehinds can be ignored, but must themselves be checked. */ @@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)      break;      case META_MARK: +    case META_COMMIT_ARG:      case META_PRUNE_ARG:      case META_SKIP_ARG:      case META_THEN_ARG: | 
