diff options
Diffstat (limited to 'ext/mbstring/oniguruma/oniguruma.h')
| -rw-r--r-- | ext/mbstring/oniguruma/oniguruma.h | 924 |
1 files changed, 597 insertions, 327 deletions
diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index e5236a80a7..fd9e8f1700 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -2,7 +2,7 @@ oniguruma.h - Oniguruma (regular expression library) - Copyright (C) 2002-2003 K.Kosako (kosako@sofnec.co.jp) + Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) **********************************************************************/ #ifndef ONIGURUMA_H @@ -11,23 +11,12 @@ #include "php_compat.h" #define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 1 -#define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 1 - -/* config parameters */ -#ifndef RE_NREGS -#define RE_NREGS 10 -#endif -#define REG_NREGION RE_NREGS -#define REG_MAX_BACKREF_NUM 1000 -#define REG_MAX_REPEAT_NUM 100000 -#define REG_MAX_MULTI_BYTE_RANGES_NUM 1000 -/* constants */ -#define REG_MAX_ERROR_MESSAGE_LEN 90 +#define ONIGURUMA_VERSION_MAJOR 2 +#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_TEENY 2 #ifndef P_ -#ifdef __STDC__ +#if defined(__STDC__) || defined(_WIN32) # define P_(args) args #else # define P_(args) () @@ -42,241 +31,543 @@ #endif #endif -#ifndef REG_EXTERN +#ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__CYGWIN__) #if defined(EXPORT) || defined(RUBY_EXPORT) -#define REG_EXTERN extern __declspec(dllexport) -#elif defined(IMPORT) -#define REG_EXTERN extern __declspec(dllimport) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) #endif #endif #endif -#ifndef REG_EXTERN -#define REG_EXTERN extern +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern #endif -#define REG_CHAR_TABLE_SIZE 256 +/* PART: character encoding */ + +typedef unsigned char UChar; +typedef unsigned long OnigCodePoint; +typedef unsigned int OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigCodePointRange; + +#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16 +typedef struct { + int target_num; + int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; + UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; +} OnigEncFoldMatchInfo; -#define REGCODE_UNDEF ((RegCharEncoding )0) #if defined(RUBY_PLATFORM) && defined(M17N_H) -#define REG_RUBY_M17N -typedef m17n_encoding* RegCharEncoding; -#define REGCODE_DEFAULT REGCODE_UNDEF + +#define ONIG_RUBY_M17N +typedef m17n_encoding* OnigEncoding; + #else -typedef const char* RegCharEncoding; -#define MBCTYPE_ASCII 0 -#define MBCTYPE_EUC 1 -#define MBCTYPE_SJIS 2 -#define MBCTYPE_UTF8 3 - -#define REGCODE_ASCII REG_MBLEN_TABLE[MBCTYPE_ASCII] -#define REGCODE_UTF8 REG_MBLEN_TABLE[MBCTYPE_UTF8] -#define REGCODE_EUCJP REG_MBLEN_TABLE[MBCTYPE_EUC] -#define REGCODE_SJIS REG_MBLEN_TABLE[MBCTYPE_SJIS] -#define REGCODE_DEFAULT REGCODE_ASCII - -REG_EXTERN const char REG_MBLEN_TABLE[][REG_CHAR_TABLE_SIZE]; + +typedef struct { + const char len_table[256]; + const char* name; + int max_enc_len; + int is_fold_match; + int ctype_support_level; /* sb-only/full */ + int is_continuous_sb_mb; /* code point is continuous from sb to mb */ + OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end); + int (*code_to_mbclen)(OnigCodePoint code); + int (*code_to_mbc)(OnigCodePoint code, UChar *buf); + int (*mbc_to_lower)(UChar* p, UChar* lower); + int (*mbc_is_case_ambig)(UChar* p); + int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype); + int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]); + UChar* (*left_adjust_char_head)(UChar* start, UChar* s); + int (*is_allowed_reverse_match)(UChar* p, UChar* e); + int (*get_all_fold_match_code)(OnigCodePoint** codes); + int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info); +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; +ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; +ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) +#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) +#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) +#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) +#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) +#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) +#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) +#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) +#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) +#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) +#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) +#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) +#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) +#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) +#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) +#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) +#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) +#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) +#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) +#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) +#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) +#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) +#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) +#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) + #endif /* else RUBY && M17N */ -REG_EXTERN RegCharEncoding RegDefaultCharEncoding; +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN + +/* character types */ +#define ONIGENC_CTYPE_ALPHA (1<< 0) +#define ONIGENC_CTYPE_BLANK (1<< 1) +#define ONIGENC_CTYPE_CNTRL (1<< 2) +#define ONIGENC_CTYPE_DIGIT (1<< 3) +#define ONIGENC_CTYPE_GRAPH (1<< 4) +#define ONIGENC_CTYPE_LOWER (1<< 5) +#define ONIGENC_CTYPE_PRINT (1<< 6) +#define ONIGENC_CTYPE_PUNCT (1<< 7) +#define ONIGENC_CTYPE_SPACE (1<< 8) +#define ONIGENC_CTYPE_UPPER (1<< 9) +#define ONIGENC_CTYPE_XDIGIT (1<<10) +#define ONIGENC_CTYPE_WORD (1<<11) +#define ONIGENC_CTYPE_ASCII (1<<12) +#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) + +/* ctype support level */ +#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0 +#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1 + + +#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#ifdef ONIG_RUBY_M17N + +#include <ctype.h> /* for isblank(), isgraph() */ + +#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf) +#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p) + +#define ONIGENC_IS_FOLD_MATCH(enc) FALSE +#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE +#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + onigenc_is_allowed_reverse_match(enc, s, end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + onigenc_get_left_adjust_char_head(enc, start, s) +#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0 +#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ + ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b) +#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) +#define ONIGENC_MBC_MAXLEN_DIST(enc) \ + (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \ + : ONIG_INFINITE_DISTANCE) +#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code)) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf) + +#if 0 +#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */ +#endif + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \ + onigenc_is_code_ctype(enc,code,ctype) -#if defined(RUBY_PLATFORM) && !defined(M17N_H) -#undef ismbchar -#define ismbchar(c) (mbclen((c)) != 1) -#define mbclen(c) RegDefaultCharEncoding[(unsigned char )(c)] +#ifdef isblank +# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code) +#else +# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t') +#endif +#ifdef isgraph +# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code) +#else +# define ONIGENC_IS_CODE_GRAPH(enc,code) \ + (isprint((int )code) && !isspace((int )code)) #endif -typedef unsigned int RegOptionType; -typedef unsigned char* RegTransTableType; -typedef unsigned int RegDistance; -typedef unsigned char UChar; +#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code) +#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code) +#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code) +#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code) +#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code) +#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code) +#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code) +#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code) +#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code) +#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code) + +ONIG_EXTERN +int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); +ONIG_EXTERN +int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN +int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf)); +ONIG_EXTERN +int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p)); +ONIG_EXTERN +int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)); + +#else /* ONIG_RUBY_M17N */ + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf) +#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p) + +#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match) +#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb) +#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + (enc)->left_adjust_char_head(start, s) +#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \ + (enc)->get_all_fold_match_code(codes) +#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \ + (enc)->get_fold_match_info(p,end,info) +#define ONIGENC_STEP_BACK(enc,start,s,n) \ + onigenc_step_back((enc),(start),(s),(n)) + +#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)]) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype) + +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ + (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr) + +ONIG_EXTERN +UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n)); + +#endif /* is not ONIG_RUBY_M17N */ + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init P_(()); +ONIG_EXTERN +int onigenc_set_default_encoding P_((OnigEncoding enc)); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding P_(()); +ONIG_EXTERN +void onigenc_set_default_caseconv_table P_((UChar* table)); +ONIG_EXTERN +UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev)); +ONIG_EXTERN +UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N) +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +#undef ismbchar +#define ismbchar(c) (mbclen((c)) != 1) +#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)]) +#endif -#define REG_OPTION_DEFAULT REG_OPTION_NONE +typedef unsigned int OnigOptionType; -/* GNU regex options */ -#define RE_OPTION_IGNORECASE (1L) -#define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE << 1) -#define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED << 1) -#define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE << 1) -#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) -#define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE /* options */ -#define REG_OPTION_NONE 0 -#define REG_OPTION_SINGLELINE RE_OPTION_SINGLELINE -#define REG_OPTION_MULTILINE RE_OPTION_MULTILINE -#define REG_OPTION_IGNORECASE RE_OPTION_IGNORECASE -#define REG_OPTION_EXTEND RE_OPTION_EXTENDED -#define REG_OPTION_FIND_LONGEST RE_OPTION_LONGEST -#define REG_OPTION_FIND_NOT_EMPTY (REG_OPTION_FIND_LONGEST << 1) -#define REG_OPTION_NEGATE_SINGLELINE (REG_OPTION_FIND_NOT_EMPTY << 1) -#define REG_OPTION_CAPTURE_ONLY_NAMED_GROUP (REG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_NONE 0 +#define ONIG_OPTION_IGNORECASE 1L +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) /* options (search time) */ -#define REG_OPTION_NOTBOL (REG_OPTION_CAPTURE_ONLY_NAMED_GROUP << 1) -#define REG_OPTION_NOTEOL (REG_OPTION_NOTBOL << 1) -#define REG_OPTION_POSIX_REGION (REG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define REG_OPTION_ON(options,regopt) ((options) |= (regopt)) -#define REG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) -#define IS_REG_OPTION_ON(options,option) ((options) & (option)) +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) /* syntax */ typedef struct { unsigned int op; unsigned int op2; unsigned int behavior; - RegOptionType options; /* default option */ -} RegSyntaxType; - -REG_EXTERN RegSyntaxType RegSyntaxPosixBasic; -REG_EXTERN RegSyntaxType RegSyntaxPosixExtended; -REG_EXTERN RegSyntaxType RegSyntaxEmacs; -REG_EXTERN RegSyntaxType RegSyntaxGrep; -REG_EXTERN RegSyntaxType RegSyntaxGnuRegex; -REG_EXTERN RegSyntaxType RegSyntaxJava; -REG_EXTERN RegSyntaxType RegSyntaxPerl; -REG_EXTERN RegSyntaxType RegSyntaxRuby; - -/* predefined syntaxes (see regcomp.c) */ -#define REG_SYNTAX_POSIX_BASIC (&RegSyntaxPosixBasic) -#define REG_SYNTAX_POSIX_EXTENDED (&RegSyntaxPosixExtended) -#define REG_SYNTAX_EMACS (&RegSyntaxEmacs) -#define REG_SYNTAX_GREP (&RegSyntaxGrep) -#define REG_SYNTAX_GNU_REGEX (&RegSyntaxGnuRegex) -#define REG_SYNTAX_JAVA (&RegSyntaxJava) -#define REG_SYNTAX_PERL (&RegSyntaxPerl) -#define REG_SYNTAX_RUBY (&RegSyntaxRuby) + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regparse.c) */ +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) /* default syntax */ -#define REG_SYNTAX_DEFAULT RegDefaultSyntax - -REG_EXTERN RegSyntaxType* RegDefaultSyntax; +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax /* syntax (operators) */ -#define REG_SYN_OP_ANYCHAR 1 /* . */ -#define REG_SYN_OP_0INF (1<<1) /* * */ -#define REG_SYN_OP_ESC_0INF (1<<2) -#define REG_SYN_OP_1INF (1<<3) /* + */ -#define REG_SYN_OP_ESC_1INF (1<<4) -#define REG_SYN_OP_01 (1<<5) /* ? */ -#define REG_SYN_OP_ESC_01 (1<<6) -#define REG_SYN_OP_INTERVAL (1<<7) /* {lower,upper} */ -#define REG_SYN_OP_ESC_INTERVAL (1<<8) -#define REG_SYN_OP_ALT (1<<9) /* | */ -#define REG_SYN_OP_ESC_ALT (1<<10) -#define REG_SYN_OP_SUBEXP (1<<11) /* (...) */ -#define REG_SYN_OP_ESC_SUBEXP (1<<12) -#define REG_SYN_OP_ESC_BUF_ANCHOR (1<<13) /* \A, \Z, \z */ -#define REG_SYN_OP_ESC_GNU_BUF_ANCHOR (1<<14) /* \`, \' */ -#define REG_SYN_OP_BACK_REF (1<<15) /* \num */ -#define REG_SYN_OP_CC (1<<16) /* [...] */ -#define REG_SYN_OP_ESC_WORD (1<<17) /* \w, \W */ -#define REG_SYN_OP_ESC_WORD_BEGIN_END (1<<18) /* \<. \> */ -#define REG_SYN_OP_ESC_WORD_BOUND (1<<19) /* \b, \B */ -#define REG_SYN_OP_ESC_WHITE_SPACE (1<<20) /* \s, \S */ -#define REG_SYN_OP_ESC_DIGIT (1<<21) /* \d, \D */ -#define REG_SYN_OP_LINE_ANCHOR (1<<22) /* ^, $ */ -#define REG_SYN_OP_POSIX_BRACKET (1<<23) /* [:xxxx:] */ -#define REG_SYN_OP_NON_GREEDY (1<<24) /* ??,*?,+?,{n,m}? */ -#define REG_SYN_OP_ESC_CONTROL_CHAR (1<<25) /* \n,\r,\t,\a ... */ -#define REG_SYN_OP_ESC_C_CONTROL (1<<26) /* \cx */ -#define REG_SYN_OP_ESC_OCTAL3 (1<<27) /* \OOO */ -#define REG_SYN_OP_ESC_X_HEX2 (1<<28) /* \xHH */ -#define REG_SYN_OP_ESC_X_BRACE_HEX8 (1<<29) /* \x{7HHHHHHH} */ -#define REG_SYN_OP_SUBEXP_EFFECT (1<<30) /* (?...) */ -#define REG_SYN_OP_QUOTE (1<<31) /* \Q...\E */ - -#define REG_SYN_OP2_OPTION_PERL (1<<0) /* (?imsx), (?-imsx) */ -#define REG_SYN_OP2_OPTION_RUBY (1<<1) /* (?imx), (?-imx) */ -#define REG_SYN_OP2_POSSESSIVE_REPEAT (1<<2) /* ?+,*+,++ */ -#define REG_SYN_OP2_POSSESSIVE_INTERVAL (1<<3) /* {n,m}+ */ -#define REG_SYN_OP2_CCLASS_SET (1<<4) /* [...&&..[..].] */ -#define REG_SYN_OP2_NAMED_SUBEXP (1<<5) /*(?<name>.),\k<name>*/ -#define REG_SYN_OP2_SUBEXP_CALL (1<<6) /* \g<name> */ -#define REG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<7) /* \C-x */ -#define REG_SYN_OP2_ESC_M_BAR_META (1<<8) /* \M-x */ -#define REG_SYN_OP2_ESC_V_VTAB (1<<9) /* \v as VTAB */ -#define REG_SYN_OP2_ESC_U_HEX4 (1<<10) /* \uHHHH */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ /* syntax (behavior) */ -#define REG_SYN_CONTEXT_INDEP_ANCHORS (1<<0) /* not implemented */ -#define REG_SYN_CONTEXT_INDEP_OPS (1<<1) /* ?, *, +, {n,m} */ -#define REG_SYN_CONTEXT_INVALID_OPS (1<<2) /* error or ignore */ -#define REG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<3) /* ...)... */ -#define REG_SYN_ALLOW_INVALID_INTERVAL (1<<4) /* {??? */ -#define REG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ etc.*/ -#define REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ - -/* syntax in char class [...] */ -#define REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED (1<<10) /* [,-,] */ -#define REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<11) -#define REG_SYN_ESCAPE_IN_CC (1<<12) /* [...\w..] etc.. */ -#define REG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<13) -#define REG_SYN_ALLOW_RANGE_OP_IN_CC (1<<14) /* [0-9-a] */ - +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 /* error codes */ -#define REG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -300) +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) /* normal return */ -#define REG_NORMAL 0 -#define REG_MISMATCH -1 -#define REG_NO_SUPPORT_CONFIG -2 +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 /* internal error */ -#define REGERR_MEMORY -5 -#define REGERR_MATCH_STACK_LIMIT_OVER -6 -#define REGERR_TYPE_BUG -10 -#define REGERR_PARSER_BUG -11 -#define REGERR_STACK_BUG -12 -#define REGERR_UNDEFINED_BYTECODE -13 -#define REGERR_UNEXPECTED_BYTECODE -14 -#define REGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 -#define REGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 /* syntax error */ -#define REGERR_END_PATTERN_AT_LEFT_BRACE -100 -#define REGERR_END_PATTERN_AT_LEFT_BRACKET -101 -#define REGERR_EMPTY_CHAR_CLASS -102 -#define REGERR_PREMATURE_END_OF_CHAR_CLASS -103 -#define REGERR_END_PATTERN_AT_BACKSLASH -104 -#define REGERR_END_PATTERN_AT_META -105 -#define REGERR_END_PATTERN_AT_CONTROL -106 -#define REGERR_META_CODE_SYNTAX -108 -#define REGERR_CONTROL_CODE_SYNTAX -109 -#define REGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 -#define REGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 -#define REGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 -#define REGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 -#define REGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 -#define REGERR_NESTED_REPEAT_OPERATOR -115 -#define REGERR_UNMATCHED_CLOSE_PARENTHESIS -116 -#define REGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 -#define REGERR_END_PATTERN_IN_GROUP -118 -#define REGERR_UNDEFINED_GROUP_OPTION -119 -#define REGERR_INVALID_POSIX_BRACKET_TYPE -121 -#define REGERR_INVALID_LOOK_BEHIND_PATTERN -122 -#define REGERR_INVALID_REPEAT_RANGE_PATTERN -123 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_BACKSLASH -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 /* values error (syntax error) */ -#define REGERR_TOO_BIG_NUMBER -200 -#define REGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 -#define REGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 -#define REGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 -#define REGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 -#define REGERR_TOO_MANY_MULTI_BYTE_RANGES -205 -#define REGERR_TOO_SHORT_MULTI_BYTE_STRING -206 -#define REGERR_TOO_BIG_BACKREF_NUMBER -207 -#define REGERR_INVALID_BACKREF -208 -#define REGERR_TOO_BIG_WIDE_CHAR_VALUE -209 -#define REGERR_TOO_LONG_WIDE_CHAR_VALUE -210 -#define REGERR_INVALID_WIDE_CHAR_VALUE -211 -#define REGERR_INVALID_SUBEXP_NAME -212 -#define REGERR_UNDEFINED_NAME_REFERENCE -213 -#define REGERR_UNDEFINED_GROUP_REFERENCE -214 -#define REGERR_MULTIPLEX_DEFINITION_NAME_CALL -215 -#define REGERR_NEVER_ENDING_RECURSION -216 +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 /* errors related to thread */ -#define REGERR_OVER_THREAD_PASS_LIMIT_COUNT -301 +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) /* match result region type */ struct re_registers { @@ -284,164 +575,143 @@ struct re_registers { int num_regs; int* beg; int* end; + /* extended */ + struct re_registers** list; /* capture history. list[1]-list[31] */ }; -#define REG_REGION_NOTPOS -1 +#define ONIG_REGION_NOTPOS -1 -typedef struct re_registers RegRegion; +typedef struct re_registers OnigRegion; typedef struct { UChar* par; UChar* par_end; -} RegErrorInfo; +} OnigErrorInfo; typedef struct { int lower; int upper; -} RegRepeatRange; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc) P_((char* s)); +extern void onig_null_warn P_((char* s)); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 /* regex_t state */ -#define REG_STATE_NORMAL 0 -#define REG_STATE_SEARCHING 1 -#define REG_STATE_COMPILING -1 -#define REG_STATE_MODIFY -2 +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 -#define REG_STATE(regex) \ - ((regex)->state > 0 ? REG_STATE_SEARCHING : (regex)->state) +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) typedef struct re_pattern_buffer { - /* common members in BBuf(bytes-buffer) type */ + /* common members of BBuf(bytes-buffer) */ unsigned char* p; /* compiled pattern */ unsigned int used; /* used space for p */ unsigned int alloc; /* allocated space for p */ - int state; /* normal, searching, compiling */ - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_NULL_CHECK_START/END id counter */ - int num_call; /* number of subexp call */ - unsigned int backtrack_mem; + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ int stack_pop_level; int repeat_range_alloc; - RegRepeatRange* repeat_range; + OnigRepeatRange* repeat_range; - RegCharEncoding enc; - RegOptionType options; - RegSyntaxType* syntax; + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; void* name_table; - /* optimize info (string search and char-map and anchor) */ + /* optimization info (string search, char-map and anchors) */ int optimize; /* optimize flag */ int threshold_len; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - RegDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - RegDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ int sub_anchor; /* start-anchor for exact or map */ unsigned char *exact; unsigned char *exact_end; - unsigned char map[REG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ int *int_map; /* BM skip for exact_len > 255 */ int *int_map_backward; /* BM skip for backward search */ - RegDistance dmin; /* min-distance of exact or map */ - RegDistance dmax; /* max-distance of exact or map */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict on multi-thread */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ } regex_t; -#ifdef RUBY_PLATFORM -#define re_mbcinit ruby_re_mbcinit -#define re_compile_pattern ruby_re_compile_pattern -#define re_recompile_pattern ruby_re_recompile_pattern -#define re_free_pattern ruby_re_free_pattern -#define re_adjust_startpos ruby_re_adjust_startpos -#define re_search ruby_re_search -#define re_match ruby_re_match -#define re_set_casetable ruby_re_set_casetable -#define re_copy_registers ruby_re_copy_registers -#define re_free_registers ruby_re_free_registers -#define register_info_type ruby_register_info_type -#define re_error_code_to_str ruby_error_code_to_str - -#define ruby_error_code_to_str regex_error_code_to_str -#define ruby_re_copy_registers regex_region_copy -#else -#define re_error_code_to_str regex_error_code_to_str -#define re_copy_registers regex_region_copy -#endif /* Oniguruma Native API */ -REG_EXTERN -int regex_init P_((void)); -REG_EXTERN -int regex_error_code_to_str PV_((UChar* s, int err_code, ...)); -REG_EXTERN -int regex_new P_((regex_t**, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo)); -REG_EXTERN -void regex_free P_((regex_t*)); -REG_EXTERN -int regex_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, RegOptionType option, RegCharEncoding code, RegSyntaxType* syntax, RegErrorInfo* einfo)); -REG_EXTERN -int regex_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, RegRegion* region, RegOptionType option)); -REG_EXTERN -int regex_match P_((regex_t*, UChar* str, UChar* end, UChar* at, RegRegion* region, RegOptionType option)); -REG_EXTERN -RegRegion* regex_region_new P_((void)); -REG_EXTERN -void regex_region_free P_((RegRegion* region, int free_self)); -REG_EXTERN -void regex_region_copy P_((RegRegion* to, RegRegion* from)); -REG_EXTERN -void regex_region_clear P_((RegRegion* region)); -REG_EXTERN -int regex_region_resize P_((RegRegion* region, int n)); -REG_EXTERN -int regex_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, +ONIG_EXTERN +int onig_init P_((void)); +ONIG_EXTERN +int onig_error_code_to_str PV_((UChar* s, int err_code, ...)); +ONIG_EXTERN +void onig_set_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +void onig_set_verb_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +void onig_free P_((regex_t*)); +ONIG_EXTERN +int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +OnigRegion* onig_region_new P_((void)); +ONIG_EXTERN +void onig_region_free P_((OnigRegion* region, int free_self)); +ONIG_EXTERN +void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); +ONIG_EXTERN +void onig_region_clear P_((OnigRegion* region)); +ONIG_EXTERN +int onig_region_resize P_((OnigRegion* region, int n)); +ONIG_EXTERN +int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, int** nums)); -REG_EXTERN -int regex_foreach_name P_((regex_t* reg, int (*func)(UChar*,int,int*,void*), void* arg)); -REG_EXTERN -UChar* regex_get_prev_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); -REG_EXTERN -UChar* regex_get_left_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); -REG_EXTERN -UChar* regex_get_right_adjust_char_head P_((RegCharEncoding code, UChar* start, UChar* s)); -REG_EXTERN -void regex_set_default_trans_table P_((UChar* table)); -REG_EXTERN -int regex_set_default_syntax P_((RegSyntaxType* syntax)); -REG_EXTERN -int regex_end P_((void)); -REG_EXTERN -const char* regex_version P_((void)); - - -/* GNU regex API */ -#ifdef REG_RUBY_M17N -REG_EXTERN -void re_mbcinit P_((RegCharEncoding)); -#else -REG_EXTERN -void re_mbcinit P_((int)); -#endif - -REG_EXTERN -int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -REG_EXTERN -int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); -REG_EXTERN -void re_free_pattern P_((struct re_pattern_buffer*)); -REG_EXTERN -int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); -REG_EXTERN -int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); -REG_EXTERN -int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); -REG_EXTERN -void re_set_casetable P_((const char*)); -REG_EXTERN -void re_free_registers P_((struct re_registers*)); -REG_EXTERN -int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ +ONIG_EXTERN +int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg)); +ONIG_EXTERN +int onig_number_of_names P_((regex_t* reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((regex_t* reg)); +ONIG_EXTERN +OnigOptionType onig_get_options P_((regex_t* reg)); +ONIG_EXTERN +OnigSyntaxType* onig_get_syntax P_((regex_t* reg)); +ONIG_EXTERN +int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN +void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); +ONIG_EXTERN +void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); +ONIG_EXTERN +void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); +ONIG_EXTERN +void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); +ONIG_EXTERN +int onig_set_meta_char P_((unsigned int what, unsigned int c)); +ONIG_EXTERN +int onig_end P_((void)); +ONIG_EXTERN +const char* onig_version P_((void)); #endif /* ONIGURUMA_H */ |
