diff options
| author | Andrei Zmievski <andrei@php.net> | 2003-12-16 22:20:30 +0000 |
|---|---|---|
| committer | Andrei Zmievski <andrei@php.net> | 2003-12-16 22:20:30 +0000 |
| commit | 9fc9e4b2cf6f71f130ad080ea8a0924ec3732b62 (patch) | |
| tree | 50329fc541100f6beccfc10b36a748365cde7081 /ext/pcre/pcrelib/internal.h | |
| parent | e9fb9a7fa75b7e8c0381c85628741ec27f2874a9 (diff) | |
| download | php-git-9fc9e4b2cf6f71f130ad080ea8a0924ec3732b62.tar.gz | |
MFB
Diffstat (limited to 'ext/pcre/pcrelib/internal.h')
| -rw-r--r-- | ext/pcre/pcrelib/internal.h | 105 |
1 files changed, 63 insertions, 42 deletions
diff --git a/ext/pcre/pcrelib/internal.h b/ext/pcre/pcrelib/internal.h index 7bcdeefdfb..544f1c2736 100644 --- a/ext/pcre/pcrelib/internal.h +++ b/ext/pcre/pcrelib/internal.h @@ -45,6 +45,10 @@ modules, but which are not relevant to the outside. */ # include "php_config.h" #endif +#ifndef PCRE_SPY +#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ +#endif + /* The value of NEWLINE determines the newline character. The default is to leave it up to the compiler, but some sites want to force a particular value. On Unix systems, "configure" can be used to override this default. */ @@ -65,6 +69,14 @@ default default. */ #define MATCH_LIMIT 10000000 #endif +/* If you are compiling for a system that needs some magic to be inserted + * before the definition of an exported function, define this macro to contain + * the relevant magic. It apears at the start of every exported function. */ + +#define EXPORT + +#include "pcre.h" + /* When compiling for use with the Virtual Pascal compiler, these functions need to have their names changed. PCRE must be compiled with the -DVPCOMPAT option on the command line. */ @@ -83,6 +95,18 @@ neither (there some non-Unix environments where this is the case). This assumes that all calls to memmove are moving strings upwards in store, which is the case in PCRE. */ +/* Standard C headers plus the external interface definition. The only time +setjmp and stdarg are used is when NO_RECURSE is set. */ + +#include <ctype.h> +#include <limits.h> +#include <setjmp.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + #if ! HAVE_MEMMOVE #undef memmove /* some systems may have a macro */ #if HAVE_BCOPY @@ -177,21 +201,6 @@ capturing parenthesis numbers in back references. */ #define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 -/* Standard C headers plus the external interface definition */ - -#include <ctype.h> -#include <limits.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#ifndef PCRE_SPY -#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ -#endif - -#include "pcre.h" - /* In case there is no definition of offsetof() provided - though any proper Standard C system should have one. */ @@ -224,10 +233,10 @@ time, run time or study time, respectively. */ #define PUBLIC_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ - PCRE_NO_AUTO_CAPTURE) + PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK) #define PUBLIC_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY) + (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK) #define PUBLIC_STUDY_OPTIONS 0 /* None defined */ @@ -381,40 +390,40 @@ enum { class - the difference is relevant only when a UTF-8 character > 255 is encountered. */ - OP_XCLASS, /* 56 Extended class for handling UTF-8 chars within the + OP_XCLASS, /* 57 Extended class for handling UTF-8 chars within the class. This does both positive and negative. */ - OP_REF, /* 57 Match a back reference */ - OP_RECURSE, /* 58 Match a numbered subpattern (possibly recursive) */ - OP_CALLOUT, /* 59 Call out to external function if provided */ + OP_REF, /* 58 Match a back reference */ + OP_RECURSE, /* 59 Match a numbered subpattern (possibly recursive) */ + OP_CALLOUT, /* 60 Call out to external function if provided */ - OP_ALT, /* 60 Start of alternation */ - OP_KET, /* 61 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 62 These two must remain together and in this */ - OP_KETRMIN, /* 63 order. They are for groups the repeat for ever. */ + OP_ALT, /* 61 Start of alternation */ + OP_KET, /* 62 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 63 These two must remain together and in this */ + OP_KETRMIN, /* 64 order. They are for groups the repeat for ever. */ /* The assertions must come before ONCE and COND */ - OP_ASSERT, /* 64 Positive lookahead */ - OP_ASSERT_NOT, /* 65 Negative lookahead */ - OP_ASSERTBACK, /* 66 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 67 Negative lookbehind */ - OP_REVERSE, /* 68 Move pointer back - used in lookbehind assertions */ + OP_ASSERT, /* 65 Positive lookahead */ + OP_ASSERT_NOT, /* 66 Negative lookahead */ + OP_ASSERTBACK, /* 67 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */ + OP_REVERSE, /* 69 Move pointer back - used in lookbehind assertions */ /* ONCE and COND must come after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. */ - OP_ONCE, /* 69 Once matched, don't back up into the subpattern */ - OP_COND, /* 70 Conditional group */ - OP_CREF, /* 71 Used to hold an extraction string number (cond ref) */ + OP_ONCE, /* 70 Once matched, don't back up into the subpattern */ + OP_COND, /* 71 Conditional group */ + OP_CREF, /* 72 Used to hold an extraction string number (cond ref) */ - OP_BRAZERO, /* 72 These two must remain together and in this */ - OP_BRAMINZERO, /* 73 order. */ + OP_BRAZERO, /* 73 These two must remain together and in this */ + OP_BRAMINZERO, /* 74 order. */ - OP_BRANUMBER, /* 74 Used for extracting brackets whose number is greater + OP_BRANUMBER, /* 75 Used for extracting brackets whose number is greater than can fit into an opcode. */ - OP_BRA /* 75 This and greater values are used for brackets that + OP_BRA /* 76 This and greater values are used for brackets that extract substrings up to a basic limit. After that, use is made of OP_BRANUMBER. */ }; @@ -457,10 +466,10 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \ 2, /* Chars - the minimum length */ \ 2, /* not */ \ - /* Positive single-char repeats */ \ - 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** These are */ \ - 4, 4, 4, /* upto, minupto, exact ** minima */ \ - /* Negative single-char repeats */ \ + /* Positive single-char repeats ** These are */ \ + 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ + 4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \ + /* Negative single-char repeats - only for chars < 256 */ \ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 4, 4, 4, /* NOT upto, minupto, exact */ \ /* Positive type repeats */ \ @@ -552,6 +561,7 @@ just to accommodate the POSIX wrapper. */ #define ERR41 "unrecognized character after (?P" #define ERR42 "syntax error after (?P" #define ERR43 "two named groups have the same name" +#define ERR44 "invalid UTF-8 string" /* All character handling must be done as unsigned characters. Otherwise there are problems with top-bit-set characters and functions such as isspace(). @@ -615,7 +625,7 @@ typedef struct branch_chain { call within the pattern. */ typedef struct recursion_info { - struct recursion_info *prev; /* Previous recursion record (or NULL) */ + struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ int group_num; /* Number of group that was called */ const uschar *after_call; /* "Return value": points after the call in the expr */ const uschar *save_start; /* Old value of md->start_match */ @@ -623,6 +633,16 @@ typedef struct recursion_info { int saved_max; /* Number of saved offsets */ } recursion_info; +/* When compiling in a mode that doesn't use recursive calls to match(), +a structure is used to remember local variables on the heap. It is defined in +pcre.c, close to the match() function, so that it is easy to keep it in step +with any changes of local variable. However, the pointer to the current frame +must be saved in some "static" place over a longjmp(). We declare the +structure here so that we can put a pointer in the match_data structure. +NOTE: This isn't used for a "normal" compilation of pcre. */ + +struct heapframe; + /* Structure for passing "static" information around between the functions doing the matching, so that they are thread-safe. */ @@ -650,6 +670,7 @@ typedef struct match_data { int start_offset; /* The start offset value */ recursion_info *recursive; /* Linked list of recursion data */ void *callout_data; /* To pass back to callouts */ + struct heapframe *thisframe; /* Used only when compiling for no recursion */ } match_data; /* Bit definitions for entries in the pcre_ctypes table. */ |
