diff options
Diffstat (limited to 'ext/mbstring/php_mbregex.c')
-rw-r--r-- | ext/mbstring/php_mbregex.c | 983 |
1 files changed, 983 insertions, 0 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c new file mode 100644 index 0000000000..bd3069b98e --- /dev/null +++ b/ext/mbstring/php_mbregex.c @@ -0,0 +1,983 @@ +/* + * PHP3 Internationalization support program. + * + * Copyright (c) 1999,2000 by the PHP3 internationalization team. + * All rights reserved. + * + * This program is free software. You can use, redistribute and/or modify + * without fee under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY including implied or express warranty of + * marchantability or fitness for a particular purpose. + * + * Currently, the "PHP3 internationalization team" has no relationship with + * the "PHP Development Team". But we hope these code will be integrated + * into the PHP3, and it will be distributed as a part of PHP3. + * + * See README_i18n for more detail. + * + * Authors: + * Hironori Sato <satoh@jpnnet.com> + * Shigeru Kanemoto <sgk@happysize.co.jp> + * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> + */ + +/* + * PHP4 multibyte regular expression module + * Authors: + * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> + */ + +/* $Id$ */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_ini.h" +#include "mbregex.h" +#include "mbstring.h" + +#if HAVE_MBREGEX + +extern ZEND_DECLARE_MODULE_GLOBALS(mbstring) + +/* + * string buffer + */ +struct strbuf { + unsigned char* buffer; + int length; + int pos; + int allocsz; +}; + +static void +php_mbregex_strbuf_init(struct strbuf *pd) +{ + if (pd) { + pd->buffer = (char*)0; + pd->length = 0; + pd->pos = 0; + pd->allocsz = 64; + } +} + +static int +php_mbregex_strbuf_ncat(struct strbuf *pd, const unsigned char *psrc, int len) +{ + if (pd == NULL || psrc == NULL) { + return -1; + } + + if ((pd->pos + len) >= pd->length) { + /* reallocate buffer */ + int newlen = pd->length + pd->allocsz + len; + unsigned char *tmp = (unsigned char*)erealloc((void*)pd->buffer, newlen); + if (tmp == NULL) { + return -1; + } + pd->length = newlen; + pd->buffer = tmp; + } + + while (len > 0) { + pd->buffer[pd->pos++] = *psrc++; + len--; + } + + return len; +} + + +/* + * encoding name resolver + */ +int +php_mbregex_name2mbctype(const char *pname) +{ + int mbctype; + + mbctype = -1; + if (pname != NULL) { + if (strcasecmp("EUC-JP", pname) == 0) { + mbctype = MBCTYPE_EUC; + } else if (strcasecmp("UTF-8", pname) == 0) { + mbctype = MBCTYPE_UTF8; + } else if (strcasecmp("SJIS", pname) == 0) { + mbctype = MBCTYPE_SJIS; + } else if (strcasecmp("ascii", pname) == 0) { + mbctype = MBCTYPE_ASCII; + } else if (strcasecmp("euc", pname) == 0) { + mbctype = MBCTYPE_EUC; + } else if (strcasecmp("eucJP", pname) == 0) { + mbctype = MBCTYPE_EUC; + } else if (strcasecmp("EUC_JP", pname) == 0) { + mbctype = MBCTYPE_EUC; + } else if (strcasecmp("Shift_JIS", pname) == 0) { + mbctype = MBCTYPE_SJIS; + } + } + + return mbctype; +} + +static const char* +php_mbregex_mbctype2name(int mbctype) +{ + const char *p; + + if (mbctype == MBCTYPE_EUC) { + p = "EUC-JP"; + } else if(mbctype == MBCTYPE_UTF8) { + p = "UTF-8"; + } else if(mbctype == MBCTYPE_SJIS) { + p = "SJIS"; + } else if(mbctype == MBCTYPE_ASCII) { + p = "ascii"; + } else { + p = "unknown"; + } + + return p; +} + + +/* + * regex cache + */ +static int +php_mbregex_compile_pattern(mb_regex_t *pre, const char *pattern, int patlen, int options, int mbctype TSRMLS_DC) +{ + int res = 0; + const char *err_str = NULL; + mb_regex_t *rc = NULL; + + if(zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc) == FAILURE || + rc->options != options || rc->mbctype != mbctype) { + memset(pre, 0, sizeof(*pre)); + pre->fastmap = (char*)emalloc((1 << MBRE_BYTEWIDTH)*sizeof(char)); + if (pre->fastmap) { + pre->options = options; + pre->mbctype = mbctype; + err_str = mbre_compile_pattern(pattern, patlen, pre); + if (!err_str) { + zend_hash_update(&MBSTRG(ht_rc), (char *) pattern, patlen+1, (void *) pre, sizeof(*pre), NULL); + } else { + efree(pre->fastmap); + pre->fastmap = (char*)0; + php_error(E_WARNING, "mbregex compile err: %s", err_str); + res = 1; + } + } else { + php_error(E_WARNING, "Unable to allocate memory in mbregex_compile_pattern"); + res = 1; + } + } else { + memcpy(pre, rc, sizeof(*pre)); + } + + return res; +} + +static void +php_mbregex_init_option(const char *parg, int narg, int *option, int *eval) +{ + int n; + char c; + + if (parg) { + n = 0; + while(n < narg) { + c = parg[n++]; + if (option) { + switch (c) { + case 'i': + *option |= MBRE_OPTION_IGNORECASE; + break; + case 'x': + *option |= MBRE_OPTION_EXTENDED; + break; + case 'm': + *option |= MBRE_OPTION_MULTILINE; + break; + case 's': + *option |= MBRE_OPTION_SINGLELINE; + break; + case 'p': + *option |= MBRE_OPTION_POSIXLINE; + break; + case 'l': + *option |= MBRE_OPTION_LONGEST; + break; + default: + break; + } + } + if (eval && (c == 'e')) { + *eval = 1; + } + } + } +} + + +/* + * php funcions + */ + +/* {{{ proto string mb_regex_encoding([string encoding]) + Returns the current encoding as a string. */ +PHP_FUNCTION(mb_regex_encoding) +{ + zval **arg1; + int mbctype; + + if (ZEND_NUM_ARGS() == 0) { + RETVAL_STRING((char*)php_mbregex_mbctype2name(MBSTRG(current_mbctype)), 1); + } else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) { + convert_to_string_ex(arg1); + mbctype = php_mbregex_name2mbctype(Z_STRVAL_PP(arg1)); + if (mbctype < 0) { + php_error(E_WARNING, "unknown encoding \"%s\"", Z_STRVAL_PP(arg1)); + RETVAL_FALSE; + } else { + MBSTRG(current_mbctype) = mbctype; + RETVAL_TRUE; + } + } else { + WRONG_PARAM_COUNT; + } +} +/* }}} */ + + +/* regex match */ +static void +php_mbereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ + zval **arg_pattern, **arg_string, **array = NULL; + mb_regex_t re; + struct mbre_registers regs = {0, 0, 0, 0}; + int i, err, match_len, string_len, option, beg, end; + char *str; + + switch(ZEND_NUM_ARGS()) { + case 2: + if (zend_get_parameters_ex(2, &arg_pattern, &arg_string) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + + case 3: + if (zend_get_parameters_ex(3, &arg_pattern, &arg_string, &array) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + + default: + WRONG_PARAM_COUNT; + break; + } + + option = 0; + if (icase) { + option |= MBRE_OPTION_IGNORECASE; + } + + /* compile the regular expression from the supplied regex */ + if (Z_TYPE_PP(arg_pattern) == IS_STRING) { + option |= MBRE_OPTION_EXTENDED; + } else { + /* we convert numbers to integers and treat them as a string */ + if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) { + convert_to_long_ex(arg_pattern); /* get rid of decimal places */ + } + convert_to_string_ex(arg_pattern); + /* don't bother doing an extended regex with just a number */ + } + err = php_mbregex_compile_pattern( + &re, + Z_STRVAL_PP(arg_pattern), + Z_STRLEN_PP(arg_pattern), + option, MBSTRG(current_mbctype) TSRMLS_CC); + if (err) { + RETURN_FALSE; + } + + /* actually execute the regular expression */ + convert_to_string_ex(arg_string); + err = mbre_search( + &re, + Z_STRVAL_PP(arg_string), + Z_STRLEN_PP(arg_string), + 0, Z_STRLEN_PP(arg_string), + ®s); + if (err < 0) { + mbre_free_registers(®s); + RETURN_FALSE; + } + + match_len = 1; + str = Z_STRVAL_PP(arg_string); + if (array) { + match_len = regs.end[0] - regs.beg[0]; + string_len = Z_STRLEN_PP(arg_string); + zval_dtor(*array); /* start with clean array */ + array_init(*array); + for (i = 0; i < regs.num_regs; i++) { + beg = regs.beg[i]; + end = regs.end[i]; + if (beg >= 0 && beg < end && end <= string_len) { + add_index_stringl(*array, i, &str[beg], end - beg, 1); + } else { + add_index_bool(*array, i, 0); + } + } + } + + mbre_free_registers(®s); + if (match_len == 0) { + match_len = 1; + } + RETVAL_LONG(match_len); +} + +/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) + Regular expression match for multibyte string */ +PHP_FUNCTION(mb_ereg) +{ + php_mbereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) + Case-insensitive regular expression match for multibyte string */ +PHP_FUNCTION(mb_eregi) +{ + php_mbereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + + + +/* regex replacement */ +static void +php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option) +{ + zval **arg_pattern, **arg_replace, **arg_string, **arg_option; + char *p, *string, *replace; + mb_regex_t re; + struct mbre_registers regs = {0, 0, 0, 0}; + struct strbuf outdev, evaldev, *pdevice; + int i, n, err, pos, replace_len, string_len, eval; + char *description = NULL; + zval retval; + + eval = 0; + switch(ZEND_NUM_ARGS()) { + case 3: + if (zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + + case 4: + if (zend_get_parameters_ex(4, &arg_pattern, &arg_replace, &arg_string, &arg_option) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(arg_option); + option = 0; + php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, &eval); + break; + + default: + WRONG_PARAM_COUNT; + break; + } + + convert_to_string_ex(arg_pattern); + /* create regex pattern buffer */ + err = php_mbregex_compile_pattern( + &re, + Z_STRVAL_PP(arg_pattern), + Z_STRLEN_PP(arg_pattern), + option, MBSTRG(current_mbctype) TSRMLS_CC); + if (err) { + RETURN_FALSE; + } + + convert_to_string_ex(arg_replace); + replace = Z_STRVAL_PP(arg_replace); + replace_len = Z_STRLEN_PP(arg_replace); + + convert_to_string_ex(arg_string); + string = Z_STRVAL_PP(arg_string); + string_len = Z_STRLEN_PP(arg_string); + + /* initialize string buffer (auto reallocate buffer) */ + php_mbregex_strbuf_init(&outdev); + php_mbregex_strbuf_init(&evaldev); + outdev.allocsz = (string_len >> 2) + 8; + + if (eval) { + pdevice = &evaldev; + description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC); + } else { + pdevice = &outdev; + description = NULL; + } + + /* do the actual work */ + err = 0; + pos = 0; + while (err >= 0) { + err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s); + if (err <= -2) { + php_error(E_WARNING, "mbregex search failure in php_mbereg_replace_exec()"); + break; + } + if (err >= 0) { + /* copy the part of the string before the match */ + php_mbregex_strbuf_ncat(&outdev, &string[pos], regs.beg[0] - pos); + /* copy replacement and backrefs */ + i = 0; + p = replace; + while (i < replace_len) { + n = -1; + if (p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { + n = p[1] - '0'; + } + if (n >= 0 && n < regs.num_regs) { + if (regs.beg[n] >= 0 && regs.beg[n] < regs.end[n] && regs.end[n] <= string_len) { + php_mbregex_strbuf_ncat(pdevice, &string[regs.beg[n]], regs.end[n] - regs.beg[n]); + } + p += 2; + i += 2; + } else { + php_mbregex_strbuf_ncat(pdevice, p, 1); + p++; + i++; + } + } + if (eval) { + /* null terminate buffer */ + php_mbregex_strbuf_ncat(&evaldev, "\0", 1); + /* do eval */ + zend_eval_string(evaldev.buffer, &retval, description TSRMLS_CC); + /* result of eval */ + convert_to_string(&retval); + php_mbregex_strbuf_ncat(&outdev, retval.value.str.val, retval.value.str.len); + /* Clean up */ + evaldev.pos = 0; + zval_dtor(&retval); + } + n = regs.end[0]; + if (pos < n) { + pos = n; + } else { + pos++; + } + } else { /* nomatch */ + /* stick that last bit of string on our output */ + php_mbregex_strbuf_ncat(&outdev, &string[pos], string_len - pos); + } + } + + if (description) { + efree(description); + } + mbre_free_registers(®s); + if (evaldev.buffer) { + efree((void*)evaldev.buffer); + } + n = outdev.pos; + php_mbregex_strbuf_ncat(&outdev, "\0", 1); + if (err <= -2) { + if (outdev.buffer) { + efree((void*)outdev.buffer); + } + RETVAL_FALSE; + } else { + RETVAL_STRINGL(outdev.buffer, n, 0); + } +} + +/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) + Replace regular expression for multibyte string */ +PHP_FUNCTION(mb_ereg_replace) +{ + php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_EXTENDED); +} +/* }}} */ + +/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) + Case insensitive replace regular expression for multibyte string */ +PHP_FUNCTION(mb_eregi_replace) +{ + php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_EXTENDED | MBRE_OPTION_IGNORECASE); +} +/* }}} */ + + +/* {{{ proto array mb_split(string pattern, string string [, int limit]) + split multibyte string into array by regular expression */ +PHP_FUNCTION(mb_split) +{ + zval **arg_pat, **arg_str, **arg_count = NULL; + mb_regex_t re; + struct mbre_registers regs = {0, 0, 0, 0}; + char *string; + int n, err, count, string_len, pos; + + count = -1; + switch (ZEND_NUM_ARGS()) { + case 2: + if (zend_get_parameters_ex(2, &arg_pat, &arg_str) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + + case 3: + if (zend_get_parameters_ex(3, &arg_pat, &arg_str, &arg_count) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_long_ex(arg_count); + count = Z_LVAL_PP(arg_count); + break; + + default: + WRONG_PARAM_COUNT; + break; + } + + if (array_init(return_value) == FAILURE) { + RETURN_FALSE; + } + + convert_to_string_ex(arg_pat); + convert_to_string_ex(arg_str); + + /* create regex pattern buffer */ + err = php_mbregex_compile_pattern( + &re, + Z_STRVAL_PP(arg_pat), + Z_STRLEN_PP(arg_pat), + MBRE_OPTION_EXTENDED, MBSTRG(current_mbctype) TSRMLS_CC); + if (err) { + RETURN_FALSE; + } + + string = Z_STRVAL_PP(arg_str); + string_len = Z_STRLEN_PP(arg_str); + pos = 0; + err = 0; + /* churn through str, generating array entries as we go */ + while ((count != 0) && + (err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s)) >= 0) { + n = regs.beg[0]; + if (n == pos) { + /* match is at start of string, return empty string */ + add_next_index_stringl(return_value, empty_string, 0, 1); + } else { + /* On a real match */ + /* add it to the array */ + if (n < string_len) { + n -= pos; + add_next_index_stringl(return_value, &string[pos], n, 1); + } else { + err = -2; + break; + } + } + /* point at our new starting point */ + n = regs.end[0]; + if (pos < n) { + pos = n; + } else { + pos++; + } + /* if we're only looking for a certain number of points, + stop looking once we hit it */ + if (count > 0) { + count--; + } + } + + mbre_free_registers(®s); + + /* see if we encountered an error */ + if (err <= -2) { + php_error(E_WARNING, "mbregex search failure in mbsplit()"); + zval_dtor(return_value); + RETURN_FALSE; + } + + /* otherwise we just have one last element to add to the array */ + n = string_len - pos; + if (n > 0) { + add_next_index_stringl(return_value, &string[pos], n, 1); + } else { + add_next_index_stringl(return_value, empty_string, 0, 1); + } +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) + Regular expression match for multibyte string */ +PHP_FUNCTION(mb_ereg_match) +{ + zval **arg_pattern, **arg_str, **arg_option; + mb_regex_t re; + int option, err; + + option = MBRE_OPTION_EXTENDED; + switch (ZEND_NUM_ARGS()) { + case 2: + if (zend_get_parameters_ex(2, &arg_pattern, &arg_str) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + case 3: + if (zend_get_parameters_ex(3, &arg_pattern, &arg_str, &arg_option) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(arg_option); + option = 0; + php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); + break; + default: + WRONG_PARAM_COUNT; + break; + } + + /* create regex pattern buffer */ + convert_to_string_ex(arg_pattern); + err = php_mbregex_compile_pattern( + &re, + Z_STRVAL_PP(arg_pattern), + Z_STRLEN_PP(arg_pattern), + option, MBSTRG(current_mbctype) TSRMLS_CC); + if (err) { + RETURN_FALSE; + } + + /* match */ + convert_to_string_ex(arg_str); + err = mbre_match(&re, Z_STRVAL_PP(arg_str), Z_STRLEN_PP(arg_str), 0, NULL); + if (err >= 0) { + RETVAL_TRUE; + } else { + RETVAL_FALSE; + } +} +/* }}} */ + + +/* regex search */ +static void +php_mbereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) +{ + zval **arg_pattern, **arg_option; + int n, i, err, pos, len, beg, end, option; + unsigned char *str; + + option = MBRE_OPTION_EXTENDED; + switch (ZEND_NUM_ARGS()) { + case 0: + break; + case 1: + if (zend_get_parameters_ex(1, &arg_pattern) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + case 2: + if (zend_get_parameters_ex(2, &arg_pattern, &arg_option) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(arg_option); + option = 0; + php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); + break; + default: + WRONG_PARAM_COUNT; + break; + } + if (ZEND_NUM_ARGS() > 0) { + /* create regex pattern buffer */ + convert_to_string_ex(arg_pattern); + if (!MBSTRG(search_re)) { + MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); + } + err = php_mbregex_compile_pattern( + MBSTRG(search_re), + Z_STRVAL_PP(arg_pattern), + Z_STRLEN_PP(arg_pattern), + option, MBSTRG(current_mbctype) TSRMLS_CC); + if (err) { + efree(MBSTRG(search_re)); + MBSTRG(search_re) = (mb_regex_t*)0; + RETURN_FALSE; + } + } + + pos = MBSTRG(search_pos); + str = NULL; + len = 0; + if (Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING){ + str = Z_STRVAL_PP(MBSTRG(search_str)); + len = Z_STRLEN_PP(MBSTRG(search_str)); + } + + if (!MBSTRG(search_re)) { + php_error(E_WARNING, "no regex for search"); + RETURN_FALSE; + } + if (!str) { + php_error(E_WARNING, "no string for search"); + RETURN_FALSE; + } + if (MBSTRG(search_regs)) { + mbre_free_registers(MBSTRG(search_regs)); + memset(MBSTRG(search_regs), 0, sizeof(struct mbre_registers)); + } else { + MBSTRG(search_regs) = (struct mbre_registers*)ecalloc(1, sizeof(struct mbre_registers)); + } + + err = mbre_search(MBSTRG(search_re), str, len, pos, len - pos, MBSTRG(search_regs)); + + if (err <= -2) { + php_error(E_WARNING, "mbregex search failure in mbregex_search()"); + RETVAL_FALSE; + } else if (err < 0) { + MBSTRG(search_pos) = len; + RETVAL_FALSE; + } else { + switch (mode) { + case 1: + if (array_init(return_value) != FAILURE) { + beg = MBSTRG(search_regs)->beg[0]; + end = MBSTRG(search_regs)->end[0]; + add_next_index_long(return_value, beg); + add_next_index_long(return_value, end - beg); + } else { + RETVAL_FALSE; + } + break; + case 2: + if (array_init(return_value) != FAILURE) { + n = MBSTRG(search_regs)->num_regs; + for (i = 0; i < n; i++) { + beg = MBSTRG(search_regs)->beg[i]; + end = MBSTRG(search_regs)->end[i]; + if (beg >= 0 && beg <= end && end <= len) { + add_index_stringl(return_value, i, &str[beg], end - beg, 1); + } else { + add_index_bool(return_value, i, 0); + } + } + } else { + RETVAL_FALSE; + } + break; + default: + RETVAL_TRUE; + break; + } + end = MBSTRG(search_regs)->end[0]; + if (pos < end) { + MBSTRG(search_pos) = end; + } else { + MBSTRG(search_pos) = pos + 1; + } + } + + if (err < 0) { + mbre_free_registers(MBSTRG(search_regs)); + efree(MBSTRG(search_regs)); + MBSTRG(search_regs) = (struct mbre_registers*)0; + } +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) + Regular expression search for multibyte string */ +PHP_FUNCTION(mb_ereg_search) +{ + php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + + +/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) + Regular expression search for multibyte string */ +PHP_FUNCTION(mb_ereg_search_pos) +{ + php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + + +/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) + Regular expression search for multibyte string */ +PHP_FUNCTION(mb_ereg_search_regs) +{ + php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) + Initialize string and regular expression for search. */ +PHP_FUNCTION(mb_ereg_search_init) +{ + zval **arg_str, **arg_pattern, **arg_option; + int err, option; + + option = MBRE_OPTION_EXTENDED; + switch (ZEND_NUM_ARGS()) { + case 1: + if (zend_get_parameters_ex(1, &arg_str) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + case 2: + if (zend_get_parameters_ex(2, &arg_str, &arg_pattern) == FAILURE) { + WRONG_PARAM_COUNT; + } + break; + case 3: + if (zend_get_parameters_ex(3, &arg_str, &arg_pattern, &arg_option) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(arg_option); + option = 0; + php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); + break; + default: + WRONG_PARAM_COUNT; + break; + } + if (ZEND_NUM_ARGS() > 1) { + /* create regex pattern buffer */ + convert_to_string_ex(arg_pattern); + if (!MBSTRG(search_re)) { + MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); + } + err = php_mbregex_compile_pattern( + MBSTRG(search_re), + Z_STRVAL_PP(arg_pattern), + Z_STRLEN_PP(arg_pattern), + option, MBSTRG(current_mbctype) TSRMLS_CC); + if (err) { + efree(MBSTRG(search_re)); + MBSTRG(search_re) = (mb_regex_t*)0; + RETURN_FALSE; + } + } + + if (MBSTRG(search_str)) { + if (ZVAL_REFCOUNT(*MBSTRG(search_str)) > 1) { + ZVAL_DELREF(*MBSTRG(search_str)); + } else { + zval_dtor(*MBSTRG(search_str)); + FREE_ZVAL(*MBSTRG(search_str)); + } + MBSTRG(search_str) = (zval **)0; + MBSTRG(search_str_val) = (zval *)0; + } + + if (PZVAL_IS_REF(*arg_str)) { + ZVAL_ADDREF(*arg_str); + MBSTRG(search_str_val) = *arg_str; + MBSTRG(search_str) = &MBSTRG(search_str_val); + } else { + MAKE_STD_ZVAL(MBSTRG(search_str_val)); + *MBSTRG(search_str_val) = **arg_str; + zval_copy_ctor(MBSTRG(search_str_val)); + MBSTRG(search_str_val)->refcount = 1; + MBSTRG(search_str_val)->is_ref = 0; + MBSTRG(search_str) = &MBSTRG(search_str_val); + convert_to_string_ex(MBSTRG(search_str)); + } + + MBSTRG(search_pos) = 0; + + if (MBSTRG(search_regs)) { + mbre_free_registers(MBSTRG(search_regs)); + efree(MBSTRG(search_regs)); + MBSTRG(search_regs) = (struct mbre_registers*)0; + } + + RETURN_TRUE; +} +/* }}} */ + + +/* {{{ proto array mb_ereg_search_getregs(void) + Get matched substring of the last time */ +PHP_FUNCTION(mb_ereg_search_getregs) +{ + int n, i, len, beg, end; + unsigned char *str; + + if (MBSTRG(search_regs) && Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING && + Z_STRVAL_PP(MBSTRG(search_str)) && array_init(return_value) != FAILURE) { + str = Z_STRVAL_PP(MBSTRG(search_str)); + len = Z_STRLEN_PP(MBSTRG(search_str)); + n = MBSTRG(search_regs)->num_regs; + for (i = 0; i < n; i++) { + beg = MBSTRG(search_regs)->beg[i]; + end = MBSTRG(search_regs)->end[i]; + if (beg >= 0 && beg <= end && end <= len) { + add_index_stringl(return_value, i, &str[beg], end - beg, 1); + } else { + add_index_bool(return_value, i, 0); + } + } + } else { + RETVAL_FALSE; + } +} +/* }}} */ + + +/* {{{ proto int mb_ereg_search_getpos(void) + Get search start position */ +PHP_FUNCTION(mb_ereg_search_getpos) +{ + RETVAL_LONG(MBSTRG(search_pos)); +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_search_setpos(int position) + Set search start position */ +PHP_FUNCTION(mb_ereg_search_setpos) +{ + zval **arg_pos; + int n; + + if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_pos) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_long_ex(arg_pos); + n = Z_LVAL_PP(arg_pos); + if (n < 0) { + php_error(E_WARNING, "position is minus value"); + MBSTRG(search_pos) = 0; + RETVAL_FALSE; + } else { + MBSTRG(search_pos) = n; + RETVAL_TRUE; + } +} +/* }}} */ + +#endif /* HAVE_MBREGEX */ |