diff options
author | SVN Migration <svn@php.net> | 2003-02-27 17:43:39 +0000 |
---|---|---|
committer | SVN Migration <svn@php.net> | 2003-02-27 17:43:39 +0000 |
commit | 078bcec0997ad0e07b720c43cc9e6d0e046a75ab (patch) | |
tree | 36cb0f6be2ef078fe3374de8c087b93ecf82f812 /ext/mbstring/php_mbregex.c | |
parent | fd61f69077f6156ca71dde60ecfd9ed9765a02db (diff) | |
download | php-git-PHP-5.tar.gz |
This commit was manufactured by cvs2svn to create branch 'PHP_5'.PHP-5
Diffstat (limited to 'ext/mbstring/php_mbregex.c')
-rw-r--r-- | ext/mbstring/php_mbregex.c | 1163 |
1 files changed, 0 insertions, 1163 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c deleted file mode 100644 index 29528bac9f..0000000000 --- a/ext/mbstring/php_mbregex.c +++ /dev/null @@ -1,1163 +0,0 @@ -/* - +----------------------------------------------------------------------+ - | PHP Version 4 | - +----------------------------------------------------------------------+ - | Copyright (c) 1997-2003 The PHP Group | - +----------------------------------------------------------------------+ - | This source file is subject to version 2.02 of the PHP license, | - | that is bundled with this package in the file LICENSE, and is | - | available at through the world-wide-web at | - | http://www.php.net/license/2_02.txt. | - | If you did not receive a copy of the PHP license and are unable to | - | obtain it through the world-wide-web, please send a note to | - | license@php.net so we can mail you a copy immediately. | - +----------------------------------------------------------------------+ - | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> | - +----------------------------------------------------------------------+ - */ - -/* $Id$ */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "php.h" -#include "php_ini.h" -#include "php_mbregex.h" -#include "mbregex.h" -#include "mbstring.h" - -#if HAVE_MBREGEX - -ZEND_EXTERN_MODULE_GLOBALS(mbstring) - -#ifdef ZTS -MUTEX_T mbregex_locale_mutex = NULL; -#endif - -/* - * string buffer - */ -struct strbuf { - unsigned char* buffer; - int length; - int pos; - int allocsz; -}; - -static void -_php_mb_regex_strbuf_init(struct strbuf *pd) -{ - if (pd) { - pd->buffer = (unsigned char*)0; - pd->length = 0; - pd->pos = 0; - pd->allocsz = 64; - } -} - -static int -_php_mb_regex_strbuf_ncat(struct strbuf *pd, const unsigned char *psrc, int len) -{ - if (pd == NULL || psrc == NULL) { - return -1; - } - - if ((pd->pos + len) >= pd->length) { - /* reallocate buffer */ - int newlen = pd->length + pd->allocsz + len; - unsigned char *tmp = (unsigned char*)erealloc((void*)pd->buffer, newlen); - if (tmp == NULL) { - return -1; - } - pd->length = newlen; - pd->buffer = tmp; - } - - while (len > 0) { - pd->buffer[pd->pos++] = *psrc++; - len--; - } - - return len; -} - -/* {{{ static void php_mb_regex_free_cache() */ -static void php_mb_regex_free_cache(mb_regex_t *pre) -{ - mbre_free_pattern(pre); -} -/* }}} */ - -/* {{{ _php_mb_regex_globals_ctor */ -void _php_mb_regex_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC) -{ - MBSTRG(default_mbctype) = MBCTYPE_EUC; - MBSTRG(current_mbctype) = MBCTYPE_EUC; - zend_hash_init(&(MBSTRG(ht_rc)), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1); - MBSTRG(search_str) = (zval**)0; - MBSTRG(search_str_val) = (zval*)0; - MBSTRG(search_re) = (mb_regex_t*)0; - MBSTRG(search_pos) = 0; - MBSTRG(search_regs) = (struct mbre_registers*)0; -} -/* }}} */ - -/* {{{ _php_mb_regex_globals_dtor */ -void _php_mb_regex_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC) -{ - zend_hash_destroy(&MBSTRG(ht_rc)); -} -/* }}} */ - -/* {{{ PHP_MINIT_FUNCTION(mb_regex) */ -PHP_MINIT_FUNCTION(mb_regex) -{ -# ifdef ZTS - mbregex_locale_mutex = tsrm_mutex_alloc(); -# endif - return SUCCESS; -} -/* }}} */ - -/* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */ -PHP_MSHUTDOWN_FUNCTION(mb_regex) -{ -#ifdef ZTS - if (mbregex_locale_mutex != NULL) { - tsrm_mutex_free(mbregex_locale_mutex); - } -#endif - return SUCCESS; -} -/* }}} */ - -/* {{{ PHP_RINIT_FUNCTION(mb_regex) */ -PHP_RINIT_FUNCTION(mb_regex) -{ - MBSTRG(regex_default_options) = MBRE_OPTION_POSIXLINE; - - return SUCCESS; -} -/* }}} */ - -/* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */ -PHP_RSHUTDOWN_FUNCTION(mb_regex) -{ - MBSTRG(current_mbctype) = MBSTRG(default_mbctype); - if (MBSTRG(search_str)) { - if (ZVAL_REFCOUNT(*MBSTRG(search_str)) > 1) { - ZVAL_DELREF(*MBSTRG(search_str)); - } else { - zval_dtor(*MBSTRG(search_str)); - FREE_ZVAL(*MBSTRG(search_str)); - } - MBSTRG(search_str) = (zval **)0; - MBSTRG(search_str_val) = (zval *)0; - } - MBSTRG(search_pos) = 0; - if (MBSTRG(search_re)) { - efree(MBSTRG(search_re)); - MBSTRG(search_re) = (mb_regex_t *)0; - } - if (MBSTRG(search_regs)) { - mbre_free_registers(MBSTRG(search_regs)); - efree(MBSTRG(search_regs)); - MBSTRG(search_regs) = (struct mbre_registers*)0; - } - zend_hash_clean(&MBSTRG(ht_rc)); - - return SUCCESS; -} -/* }}} */ - -/* - * encoding name resolver - */ -static int _php_mb_regex_name2mbctype(const char *pname) -{ - int mbctype; - - mbctype = -1; - if (pname != NULL) { - if (strcasecmp("EUC-JP", pname) == 0 - || strcasecmp("X-EUC-JP", pname) == 0 - || strcasecmp("UJIS", pname) == 0 - || strcasecmp("EUCJP", pname) == 0 - || strcasecmp("EUC_JP", pname) == 0) { - mbctype = MBCTYPE_EUC; - } else if (strcasecmp("UTF-8", pname) == 0 - || strcasecmp("UTF8", pname) == 0) { - mbctype = MBCTYPE_UTF8; - } else if (strcasecmp("SJIS", pname) == 0 - || strcasecmp("CP932", pname) == 0 - || strcasecmp("MS932", pname) == 0 - || strcasecmp("SHIFT_JIS", pname) == 0 ) { - mbctype = MBCTYPE_SJIS; - } else if (strcasecmp("ASCII", pname) == 0) { - mbctype = MBCTYPE_ASCII; - mbctype = MBCTYPE_EUC; - } - } - - return mbctype; -} - -static const char* -php_mbregex_mbctype2name(int mbctype) -{ - const char *p = NULL; - - if (mbctype == MBCTYPE_EUC) { - p = "EUC-JP"; - } else if(mbctype == MBCTYPE_UTF8) { - p = "UTF-8"; - } else if(mbctype == MBCTYPE_SJIS) { - p = "SJIS"; - } else if(mbctype == MBCTYPE_ASCII) { - p = "ascii"; - } - - return p; -} - - -/* - * regex cache - */ -static int -php_mbregex_compile_pattern(mb_regex_t *pre, const char *pattern, int patlen, int options, int mbctype TSRMLS_DC) -{ - int res = 0; - const char *err_str = NULL; - mb_regex_t *rc = NULL; - - if(zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc) == FAILURE || - rc->options != options || rc->mbctype != mbctype) { - memset(pre, 0, sizeof(*pre)); - pre->fastmap = (char*)emalloc((1 << MBRE_BYTEWIDTH)*sizeof(char)); - pre->options = options; - pre->mbctype = mbctype; - err_str = mbre_compile_pattern(pattern, patlen, pre); - if (!err_str) { - zend_hash_update(&MBSTRG(ht_rc), (char *) pattern, patlen+1, (void *) pre, sizeof(*pre), NULL); - } else { - efree(pre->fastmap); - pre->fastmap = (char*)0; - php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str); - res = 1; - } - } else { - memcpy(pre, rc, sizeof(*pre)); - } - - return res; -} - -static size_t -_php_mb_regex_get_option_string(char *str, size_t len, int option) -{ - size_t len_left = len; - size_t len_req = 0; - char *p = str; - - - if ((option & MBRE_OPTION_IGNORECASE) != 0) { - if (len_left > 0) { - --len_left; - *(p++) = 'i'; - } - ++len_req; - } - - if ((option & MBRE_OPTION_EXTENDED) != 0) { - if (len_left > 0) { - --len_left; - *(p++) = 'x'; - } - ++len_req; - } - - if ((option & MBRE_OPTION_POSIXLINE) == MBRE_OPTION_POSIXLINE) { - if (len_left > 0) { - --len_left; - *(p++) = 'p'; - } - ++len_req; - } else { - if ((option & MBRE_OPTION_MULTILINE) != 0) { - if (len_left > 0) { - --len_left; - *(p++) = 'm'; - } - ++len_req; - } - - if ((option & MBRE_OPTION_SINGLELINE) != 0) { - if (len_left > 0) { - --len_left; - *(p++) = 's'; - } - ++len_req; - } - } - if ((option & MBRE_OPTION_LONGEST) != 0) { - if (len_left > 0) { - --len_left; - *(p++) = 'l'; - } - ++len_req; - } - if (len_left > 0) { - --len_left; - *(p++) = '\0'; - } - - ++len_req; - if (len < len_req) { - return len_req; - } - - return 0; -} - -static void -_php_mb_regex_init_options(const char *parg, int narg, int *option, int *eval) -{ - int n; - char c; - int optm = 0; - - if (parg != NULL) { - n = 0; - while(n < narg) { - c = parg[n++]; - switch (c) { - case 'i': - optm |= MBRE_OPTION_IGNORECASE; - break; - case 'x': - optm |= MBRE_OPTION_EXTENDED; - break; - case 'm': - optm |= MBRE_OPTION_MULTILINE; - break; - case 's': - optm |= MBRE_OPTION_SINGLELINE; - break; - case 'p': - optm |= MBRE_OPTION_POSIXLINE; - break; - case 'l': - optm |= MBRE_OPTION_LONGEST; - break; - case 'e': - if (eval != NULL) *eval = 1; - break; - default: - break; - } - } - if (option != NULL) *option|=optm; - } -} - - -/* - * php funcions - */ - -/* {{{ proto string mb_regex_encoding([string encoding]) - Returns the current encoding for regex as a string. */ -PHP_FUNCTION(mb_regex_encoding) -{ - zval **arg1; - int mbctype; - - if (ZEND_NUM_ARGS() == 0) { - const char *retval = php_mbregex_mbctype2name(MBSTRG(current_mbctype)); - if ( retval != NULL ) { - RETVAL_STRING((char *)retval, 1); - } else { - RETVAL_FALSE; - } - } else if (ZEND_NUM_ARGS() == 1 && - zend_get_parameters_ex(1, &arg1) != FAILURE) { - convert_to_string_ex(arg1); - mbctype = _php_mb_regex_name2mbctype(Z_STRVAL_PP(arg1)); - if (mbctype < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", Z_STRVAL_PP(arg1)); - RETVAL_FALSE; - } else { - MBSTRG(current_mbctype) = mbctype; - RETVAL_TRUE; - } - } else { - WRONG_PARAM_COUNT; - } -} -/* }}} */ - - -/* {{{ _php_mb_regex_ereg_exec */ -static void -_php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) -{ - zval *arg_pattern, *array; - char *string; - int string_len; - mb_regex_t re; - struct mbre_registers regs = {0, 0, 0, 0}; - int i, err, match_len, option, beg, end; - char *str; - - array = NULL; - - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) { - RETURN_FALSE; - } - - option = MBSTRG(regex_default_options); - if (icase) { - option |= MBRE_OPTION_IGNORECASE; - } - - /* compile the regular expression from the supplied regex */ - if (Z_TYPE_P(arg_pattern) != IS_STRING) { - /* we convert numbers to integers and treat them as a string */ - if (Z_TYPE_P(arg_pattern) == IS_DOUBLE) { - convert_to_long_ex(&arg_pattern); /* get rid of decimal places */ - } - convert_to_string_ex(&arg_pattern); - /* don't bother doing an extended regex with just a number */ - } - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_P(arg_pattern), - Z_STRLEN_P(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - /* actually execute the regular expression */ - err = mbre_search( - &re, - string, - string_len, - 0, string_len, - ®s); - if (err < 0) { - mbre_free_registers(®s); - RETURN_FALSE; - } - - match_len = 1; - str = string; - if (array) { - match_len = regs.end[0] - regs.beg[0]; - zval_dtor(array); /* start with clean array */ - array_init(array); - for (i = 0; i < regs.num_regs; i++) { - beg = regs.beg[i]; - end = regs.end[i]; - if (beg >= 0 && beg < end && end <= string_len) { - add_index_stringl(array, i, (char *)&str[beg], end - beg, 1); - } else { - add_index_bool(array, i, 0); - } - } - } - - mbre_free_registers(®s); - if (match_len == 0) { - match_len = 1; - } - RETVAL_LONG(match_len); -} - -/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) - Regular expression match for multibyte string */ -PHP_FUNCTION(mb_ereg) -{ - _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - -/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) - Case-insensitive regular expression match for multibyte string */ -PHP_FUNCTION(mb_eregi) -{ - _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} -/* }}} */ - - - -/* {{{ _php_mb_regex_ereg_replace_exec */ -static void -_php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option) -{ - zval *arg_pattern_zval; - - char *arg_pattern; - int arg_pattern_len; - - char *replace; - int replace_len; - - char *string; - int string_len; - - char *p; - mb_regex_t re; - struct mbre_registers regs = {0, 0, 0, 0}; - struct strbuf outdev, evaldev, *pdevice; - int i, n, err, pos, eval; - char *description = NULL; - char pat_buf[2]; - - const mbfl_encoding *enc; - - { - const char *current_enc_name; - current_enc_name = php_mbregex_mbctype2name(MBSTRG(current_mbctype)); - if (current_enc_name == NULL || - (enc = mbfl_name2encoding(current_enc_name)) == NULL) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); - RETURN_FALSE; - } - } - eval = 0; - { - char *option_str = NULL; - int option_str_len = 0; - - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zss|s", - &arg_pattern_zval, - &replace, &replace_len, - &string, &string_len, - &option_str, &option_str_len) == FAILURE) { - RETURN_FALSE; - } - - if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &option, &eval); - } else { - option |= MBSTRG(regex_default_options); - } - } - if (Z_TYPE_P(arg_pattern_zval) == IS_STRING) { - arg_pattern = Z_STRVAL_P(arg_pattern_zval); - arg_pattern_len = Z_STRLEN_P(arg_pattern_zval); - } else { - /* FIXME: this code is not multibyte aware! */ - convert_to_long_ex(&arg_pattern_zval); - pat_buf[0] = (char)Z_LVAL_P(arg_pattern_zval); - pat_buf[1] = '\0'; - - arg_pattern = pat_buf; - arg_pattern_len = 1; - } - /* create regex pattern buffer */ - err = php_mbregex_compile_pattern( - &re, - arg_pattern, - arg_pattern_len, - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - /* initialize string buffer (auto reallocate buffer) */ - _php_mb_regex_strbuf_init(&outdev); - _php_mb_regex_strbuf_init(&evaldev); - outdev.allocsz = (string_len >> 2) + 8; - - if (eval) { - pdevice = &evaldev; - description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC); - } else { - pdevice = &outdev; - description = NULL; - } - - /* do the actual work */ - err = 0; - pos = 0; - while (err >= 0) { - err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s); - if (err <= -2) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec()"); - break; - } - if (err >= 0) { -#if moriyoshi_0 - if ( regs.beg[0] == regs.end[0] ) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); - break; - } -#endif - /* copy the part of the string before the match */ - _php_mb_regex_strbuf_ncat(&outdev, (const unsigned char *)&string[pos], regs.beg[0] - pos); - /* copy replacement and backrefs */ - i = 0; - p = replace; - while (i < replace_len) { - int fwd = (int) php_mb_mbchar_bytes_ex(p, enc); - n = -1; - if ((replace_len - i) >= 2 && fwd == 1 && - p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { - n = p[1] - '0'; - } - if (n >= 0 && n < regs.num_regs) { - if (regs.beg[n] >= 0 && regs.beg[n] < regs.end[n] && regs.end[n] <= string_len) { - _php_mb_regex_strbuf_ncat(pdevice, (const unsigned char *)&string[regs.beg[n]], regs.end[n] - regs.beg[n]); - } - p += 2; - i += 2; - } else { - _php_mb_regex_strbuf_ncat(pdevice, (const unsigned char *)p, fwd); - p += fwd; - i += fwd; - } - } - if (eval) { - zval v; - /* null terminate buffer */ - _php_mb_regex_strbuf_ncat(&evaldev, (const unsigned char *)"\0", 1); - /* do eval */ - zend_eval_string((char *)evaldev.buffer, &v, description TSRMLS_CC); - /* result of eval */ - convert_to_string(&v); - _php_mb_regex_strbuf_ncat(&outdev, Z_STRVAL(v), Z_STRLEN(v)); - /* Clean up */ - evaldev.pos = 0; - zval_dtor(&v); - } - n = regs.end[0]; - if (pos < n) { - pos = n; - } else { - _php_mb_regex_strbuf_ncat(&outdev, (const unsigned char *)&string[pos], 1 ); - pos++; - } - } else { /* nomatch */ - /* stick that last bit of string on our output */ - _php_mb_regex_strbuf_ncat(&outdev, (const unsigned char *)&string[pos], string_len - pos); - } - } - - if (description) { - efree(description); - } - mbre_free_registers(®s); - if (evaldev.buffer) { - efree((void*)evaldev.buffer); - } - n = outdev.pos; - _php_mb_regex_strbuf_ncat(&outdev, (const unsigned char *)"\0", 1); - if (err <= -2) { - if (outdev.buffer) { - efree((void*)outdev.buffer); - } - RETVAL_FALSE; - } else { - RETVAL_STRINGL((char *)outdev.buffer, n, 0); - } -} -/* }}} */ -/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) - Replace regular expression for multibyte string */ -PHP_FUNCTION(mb_ereg_replace) -{ - _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - -/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) - Case insensitive replace regular expression for multibyte string */ -PHP_FUNCTION(mb_eregi_replace) -{ - _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_IGNORECASE); -} -/* }}} */ - - -/* {{{ proto array mb_split(string pattern, string string [, int limit]) - split multibyte string into array by regular expression */ -PHP_FUNCTION(mb_split) -{ - zval *arg_pat; - mb_regex_t re; - struct mbre_registers regs = {0, 0, 0, 0}; - char *string; - int n, err, count, string_len, pos; - - count = -1; - - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs|l", &arg_pat, - &string, &string_len, &count) == FAILURE) { - RETURN_FALSE; - } - - if (count == 0) count = 1; - - /* create regex pattern buffer */ - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_P(arg_pat), - Z_STRLEN_P(arg_pat), - MBSTRG(regex_default_options), MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - RETURN_FALSE; - } - - array_init(return_value); - - pos = 0; - err = 0; - /* churn through str, generating array entries as we go */ - while ((--count != 0) && - (err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s)) >= 0) { - if ( regs.beg[0] == regs.end[0] ) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); - break; - } - - - /* add it to the array */ - if ( regs.beg[0] < string_len && regs.beg[0] >= pos) { - add_next_index_stringl(return_value, (char *)&string[pos], regs.beg[0]-pos, 1); - } else { - err = -2; - break; - } - /* point at our new starting point */ - n = regs.end[0]; - if (pos < n) { - pos = n; - } - if (count < 0) { - count = 0; - } - } - - mbre_free_registers(®s); - - /* see if we encountered an error */ - if (err <= -2) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit()"); - zval_dtor(return_value); - RETURN_FALSE; - } - - /* otherwise we just have one last element to add to the array */ - n = string_len - pos; - if (n > 0) { - add_next_index_stringl(return_value, (char *)&string[pos], n, 1); - } else { - add_next_index_stringl(return_value, (char *)empty_string, 0, 1); - } -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) - Regular expression match for multibyte string */ -PHP_FUNCTION(mb_ereg_match) -{ - zval *arg_pattern; - - char *string; - int string_len; - - mb_regex_t re; - int option, err; - - option = 0; - { - char *option_str = NULL; - int option_str_len = 0; - - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs|s", - &arg_pattern, &string, &string_len, - &option_str, &option_str_len)==FAILURE) { - RETURN_FALSE; - } - - if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &option, NULL); - } else { - option |= MBSTRG(regex_default_options); - } - } - convert_to_string_ex(&arg_pattern); - - err = php_mbregex_compile_pattern( - &re, - Z_STRVAL_P(arg_pattern), - Z_STRLEN_P(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - - if (err) { - RETURN_FALSE; - } - - /* match */ - err = mbre_match(&re, string, string_len, 0, NULL); - if (err >= 0) { - RETVAL_TRUE; - } else { - RETVAL_FALSE; - } -} -/* }}} */ - - -/* regex search */ -static void -_php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) -{ - zval **arg_pattern, **arg_options; - int n, i, err, pos, len, beg, end, option; - unsigned char *str; - - option = MBSTRG(regex_default_options); - switch (ZEND_NUM_ARGS()) { - case 0: - break; - case 1: - if (zend_get_parameters_ex(1, &arg_pattern) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 2: - if (zend_get_parameters_ex(2, &arg_pattern, &arg_options) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(arg_options); - option = 0; - _php_mb_regex_init_options(Z_STRVAL_PP(arg_options), Z_STRLEN_PP(arg_options), &option, NULL); - break; - default: - WRONG_PARAM_COUNT; - break; - } - if (ZEND_NUM_ARGS() > 0) { - /* create regex pattern buffer */ - convert_to_string_ex(arg_pattern); - if (!MBSTRG(search_re)) { - MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); - } - err = php_mbregex_compile_pattern( - MBSTRG(search_re), - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - efree(MBSTRG(search_re)); - MBSTRG(search_re) = (mb_regex_t*)0; - RETURN_FALSE; - } - } - - pos = MBSTRG(search_pos); - str = NULL; - len = 0; - if (MBSTRG(search_str) != NULL && Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING){ - str = (unsigned char *)Z_STRVAL_PP(MBSTRG(search_str)); - len = Z_STRLEN_PP(MBSTRG(search_str)); - } - - if (!MBSTRG(search_re)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given"); - RETURN_FALSE; - } - if (!str) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given"); - RETURN_FALSE; - } - if (MBSTRG(search_regs)) { - mbre_free_registers(MBSTRG(search_regs)); - memset(MBSTRG(search_regs), 0, sizeof(struct mbre_registers)); - } else { - MBSTRG(search_regs) = (struct mbre_registers*)ecalloc(1, sizeof(struct mbre_registers)); - } - - err = mbre_search(MBSTRG(search_re), (const char *)str, len, pos, len - pos, MBSTRG(search_regs)); - if (err <= -2) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search()"); - RETVAL_FALSE; - } else if (err < 0) { - MBSTRG(search_pos) = len; - RETVAL_FALSE; - } else { - if (MBSTRG(search_regs)->beg[0] == MBSTRG(search_regs)->end[0]) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression"); - } - switch (mode) { - case 1: - array_init(return_value); - beg = MBSTRG(search_regs)->beg[0]; - end = MBSTRG(search_regs)->end[0]; - add_next_index_long(return_value, beg); - add_next_index_long(return_value, end - beg); - break; - case 2: - array_init(return_value); - n = MBSTRG(search_regs)->num_regs; - for (i = 0; i < n; i++) { - beg = MBSTRG(search_regs)->beg[i]; - end = MBSTRG(search_regs)->end[i]; - if (beg >= 0 && beg <= end && end <= len) { - add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1); - } else { - add_index_bool(return_value, i, 0); - } - } - break; - default: - RETVAL_TRUE; - break; - } - end = MBSTRG(search_regs)->end[0]; - if (pos < end) { - MBSTRG(search_pos) = end; - } else { - MBSTRG(search_pos) = pos + 1; - } - } - - if (err < 0) { - mbre_free_registers(MBSTRG(search_regs)); - efree(MBSTRG(search_regs)); - MBSTRG(search_regs) = (struct mbre_registers*)0; - } -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) - Regular expression search for multibyte string */ -PHP_FUNCTION(mb_ereg_search) -{ - _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); -} -/* }}} */ - - -/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) - Regular expression search for multibyte string */ -PHP_FUNCTION(mb_ereg_search_pos) -{ - _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); -} -/* }}} */ - - -/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) - Regular expression search for multibyte string */ -PHP_FUNCTION(mb_ereg_search_regs) -{ - _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) - Initialize string and regular expression for search. */ -PHP_FUNCTION(mb_ereg_search_init) -{ - zval **arg_str, **arg_pattern, **arg_options; - int err, option; - - option = MBSTRG(regex_default_options); - switch (ZEND_NUM_ARGS()) { - case 1: - if (zend_get_parameters_ex(1, &arg_str) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 2: - if (zend_get_parameters_ex(2, &arg_str, &arg_pattern) == FAILURE) { - WRONG_PARAM_COUNT; - } - break; - case 3: - if (zend_get_parameters_ex(3, &arg_str, &arg_pattern, &arg_options) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(arg_options); - option = 0; - _php_mb_regex_init_options(Z_STRVAL_PP(arg_options), Z_STRLEN_PP(arg_options), &option, NULL); - break; - default: - WRONG_PARAM_COUNT; - break; - } - if (ZEND_NUM_ARGS() > 1) { - /* create regex pattern buffer */ - convert_to_string_ex(arg_pattern); - if (!MBSTRG(search_re)) { - MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); - } - err = php_mbregex_compile_pattern( - MBSTRG(search_re), - Z_STRVAL_PP(arg_pattern), - Z_STRLEN_PP(arg_pattern), - option, MBSTRG(current_mbctype) TSRMLS_CC); - if (err) { - efree(MBSTRG(search_re)); - MBSTRG(search_re) = (mb_regex_t*)0; - RETURN_FALSE; - } - } - - if (MBSTRG(search_str)) { - if (ZVAL_REFCOUNT(*MBSTRG(search_str)) > 1) { - ZVAL_DELREF(*MBSTRG(search_str)); - } else { - zval_dtor(*MBSTRG(search_str)); - FREE_ZVAL(*MBSTRG(search_str)); - } - MBSTRG(search_str) = (zval **)0; - MBSTRG(search_str_val) = (zval *)0; - } - - if (PZVAL_IS_REF(*arg_str)) { - ZVAL_ADDREF(*arg_str); - MBSTRG(search_str_val) = *arg_str; - MBSTRG(search_str) = &MBSTRG(search_str_val); - } else { - MAKE_STD_ZVAL(MBSTRG(search_str_val)); - *MBSTRG(search_str_val) = **arg_str; - zval_copy_ctor(MBSTRG(search_str_val)); - MBSTRG(search_str_val)->refcount = 1; - MBSTRG(search_str_val)->is_ref = 0; - MBSTRG(search_str) = &MBSTRG(search_str_val); - convert_to_string_ex(MBSTRG(search_str)); - } - - MBSTRG(search_pos) = 0; - - if (MBSTRG(search_regs)) { - mbre_free_registers(MBSTRG(search_regs)); - efree(MBSTRG(search_regs)); - MBSTRG(search_regs) = (struct mbre_registers*)0; - } - - RETURN_TRUE; -} -/* }}} */ - - -/* {{{ proto array mb_ereg_search_getregs(void) - Get matched substring of the last time */ -PHP_FUNCTION(mb_ereg_search_getregs) -{ - int n, i, len, beg, end; - unsigned char *str; - - if (MBSTRG(search_regs) && Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING && - Z_STRVAL_PP(MBSTRG(search_str)) && array_init(return_value) != FAILURE) { - str = (unsigned char *)Z_STRVAL_PP(MBSTRG(search_str)); - len = Z_STRLEN_PP(MBSTRG(search_str)); - n = MBSTRG(search_regs)->num_regs; - for (i = 0; i < n; i++) { - beg = MBSTRG(search_regs)->beg[i]; - end = MBSTRG(search_regs)->end[i]; - if (beg >= 0 && beg <= end && end <= len) { - add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1); - } else { - add_index_bool(return_value, i, 0); - } - } - } else { - RETVAL_FALSE; - } -} -/* }}} */ - - -/* {{{ proto int mb_ereg_search_getpos(void) - Get search start position */ -PHP_FUNCTION(mb_ereg_search_getpos) -{ - RETVAL_LONG(MBSTRG(search_pos)); -} -/* }}} */ - - -/* {{{ proto bool mb_ereg_search_setpos(int position) - Set search start position */ -PHP_FUNCTION(mb_ereg_search_setpos) -{ - zval **arg_pos; - int n; - - if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_pos) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_long_ex(arg_pos); - n = Z_LVAL_PP(arg_pos); - if (n < 0 - || ( MBSTRG(search_str) != NULL && *MBSTRG(search_str) != NULL && - Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING && - n >= Z_STRLEN_PP(MBSTRG(search_str)) ) ) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range"); - MBSTRG(search_pos) = 0; - RETVAL_FALSE; - } else { - MBSTRG(search_pos) = n; - RETVAL_TRUE; - } -} -/* }}} */ - -/* {{{ php_mb_regex_set_options */ -int php_mb_regex_set_options( int options TSRMLS_DC) -{ - int prev_opt = MBSTRG(regex_default_options); - MBSTRG(regex_default_options) = options; - return prev_opt; -} -/* }}} */ - -/* {{{ php_mb_regex_set_options_by_string */ -int php_mb_regex_set_options_by_string( const char *opt_str, int len TSRMLS_DC) -{ - int new_opt = 0; - _php_mb_regex_init_options( opt_str, len, &new_opt, NULL); - return php_mb_regex_set_options( new_opt TSRMLS_CC); -} -/* }}} */ - -/* {{{ proto string mb_regex_set_options([string options]) - Set or get the default options for mbregex functions */ -PHP_FUNCTION(mb_regex_set_options) -{ - int opt; - char *string = NULL; - int string_len; - char buf[16]; - - if ( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "|s", - &string, &string_len ) == FAILURE ) { - RETURN_FALSE; - } - if (string != NULL) { - opt = php_mb_regex_set_options_by_string( (const char*)string, - string_len TSRMLS_CC ); - } else { - opt = MBSTRG(regex_default_options); - } - _php_mb_regex_get_option_string(buf, sizeof(buf), opt); - - RETVAL_STRING(buf, 1); -} -/* }}} */ - -#endif /* HAVE_MBREGEX */ |