diff options
| author | Rui Hirokawa <hirokawa@php.net> | 2002-01-11 13:21:09 +0000 | 
|---|---|---|
| committer | Rui Hirokawa <hirokawa@php.net> | 2002-01-11 13:21:09 +0000 | 
| commit | c79f749ff8fb324de6cec5d8018e4a1e3cf8ac21 (patch) | |
| tree | 8eb80c20a232c06250b0053b288a442f0e889759 /ext/mbstring/php_mbregex.c | |
| parent | 1e2b854bbacf1f6174e3b3a55b8aa4b9810e8eb1 (diff) | |
| download | php-git-c79f749ff8fb324de6cec5d8018e4a1e3cf8ac21.tar.gz | |
@Added multi-byte enabled regex functions. (Rui)
Diffstat (limited to 'ext/mbstring/php_mbregex.c')
| -rw-r--r-- | ext/mbstring/php_mbregex.c | 983 | 
1 files changed, 983 insertions, 0 deletions
| diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c new file mode 100644 index 0000000000..bd3069b98e --- /dev/null +++ b/ext/mbstring/php_mbregex.c @@ -0,0 +1,983 @@ +/*  + * PHP3 Internationalization support program. + * + * Copyright (c) 1999,2000 by the PHP3 internationalization team. + * All rights reserved. + * + * This program is free software. You can use, redistribute and/or modify + * without fee under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY including implied or express warranty of + * marchantability or fitness for a particular purpose. + * + * Currently, the "PHP3 internationalization team" has no relationship with + * the "PHP Development Team". But we hope these code will be integrated + * into the PHP3, and it will be distributed as a part of PHP3. + * + * See README_i18n for more detail. + * + * Authors: + *    Hironori Sato <satoh@jpnnet.com> + *    Shigeru Kanemoto <sgk@happysize.co.jp> + *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> + */ + +/*  + * PHP4 multibyte regular expression module + * Authors: + *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> + */ + +/* $Id$ */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_ini.h" +#include "mbregex.h" +#include "mbstring.h" + +#if HAVE_MBREGEX + +extern ZEND_DECLARE_MODULE_GLOBALS(mbstring) + +/* + * string buffer + */ +struct strbuf { +	unsigned char* buffer; +	int length; +	int pos; +	int allocsz; +}; + +static void +php_mbregex_strbuf_init(struct strbuf *pd) +{ +	if (pd) { +		pd->buffer = (char*)0; +		pd->length = 0; +		pd->pos = 0; +		pd->allocsz = 64; +	} +} + +static int +php_mbregex_strbuf_ncat(struct strbuf *pd, const unsigned char *psrc, int len) +{ +	if (pd == NULL || psrc == NULL) { +		return -1; +	} + +	if ((pd->pos + len) >= pd->length) { +		/* reallocate buffer */ +		int newlen = pd->length + pd->allocsz + len; +		unsigned char *tmp = (unsigned char*)erealloc((void*)pd->buffer, newlen); +		if (tmp == NULL) { +			return -1; +		} +		pd->length = newlen; +		pd->buffer = tmp; +	} + +	while (len > 0) { +		pd->buffer[pd->pos++] = *psrc++; +		len--; +	} + +	return len; +} + + +/* + * encoding name resolver + */ +int +php_mbregex_name2mbctype(const char *pname) +{ +	int mbctype; + +	mbctype = -1; +	if (pname != NULL) { +		if (strcasecmp("EUC-JP", pname) == 0) { +			mbctype = MBCTYPE_EUC; +		} else if (strcasecmp("UTF-8", pname) == 0) { +			mbctype = MBCTYPE_UTF8; +		} else if (strcasecmp("SJIS", pname) == 0) { +			mbctype = MBCTYPE_SJIS; +		} else if (strcasecmp("ascii", pname) == 0) { +			mbctype = MBCTYPE_ASCII; +		} else if (strcasecmp("euc", pname) == 0) { +			mbctype = MBCTYPE_EUC; +		} else if (strcasecmp("eucJP", pname) == 0) { +			mbctype = MBCTYPE_EUC; +		} else if (strcasecmp("EUC_JP", pname) == 0) { +			mbctype = MBCTYPE_EUC; +		} else if (strcasecmp("Shift_JIS", pname) == 0) { +			mbctype = MBCTYPE_SJIS; +		} +	} + +	return mbctype; +} + +static const char* +php_mbregex_mbctype2name(int mbctype) +{ +	const char *p; + +	if (mbctype == MBCTYPE_EUC) { +		p = "EUC-JP"; +	} else if(mbctype == MBCTYPE_UTF8) { +		p = "UTF-8"; +	} else if(mbctype == MBCTYPE_SJIS) { +		p = "SJIS"; +	} else if(mbctype == MBCTYPE_ASCII) { +		p = "ascii"; +	} else { +		p = "unknown"; +	} + +	return p; +} + + +/* + * regex cache + */ +static int +php_mbregex_compile_pattern(mb_regex_t *pre, const char *pattern, int patlen, int options, int mbctype TSRMLS_DC) +{ +	int res = 0; +	const char *err_str = NULL; +	mb_regex_t *rc = NULL; + +	if(zend_hash_find(&MBSTRG(ht_rc), (char *)pattern, patlen+1, (void **) &rc) == FAILURE || +			rc->options != options || rc->mbctype != mbctype) { +		memset(pre, 0, sizeof(*pre)); +		pre->fastmap = (char*)emalloc((1 << MBRE_BYTEWIDTH)*sizeof(char)); +		if (pre->fastmap) { +			pre->options = options; +			pre->mbctype = mbctype; +			err_str = mbre_compile_pattern(pattern, patlen, pre); +			if (!err_str) { +				zend_hash_update(&MBSTRG(ht_rc), (char *) pattern, patlen+1, (void *) pre, sizeof(*pre), NULL); +			} else { +				efree(pre->fastmap); +				pre->fastmap = (char*)0; +				php_error(E_WARNING, "mbregex compile err: %s", err_str); +				res = 1; +			} +		} else { +			php_error(E_WARNING, "Unable to allocate memory in mbregex_compile_pattern"); +			res = 1; +		} +	} else { +		memcpy(pre, rc, sizeof(*pre)); +	} + +	return res; +} + +static void +php_mbregex_init_option(const char *parg, int narg, int *option, int *eval)  +{ +	int n; +	char c; + +	if (parg) { +		n = 0; +		while(n < narg) { +			c = parg[n++]; +			if (option) { +				switch (c) { +				case 'i': +					*option |= MBRE_OPTION_IGNORECASE; +					break; +				case 'x': +					*option |= MBRE_OPTION_EXTENDED; +					break; +				case 'm': +					*option |= MBRE_OPTION_MULTILINE; +					break; +				case 's': +					*option |= MBRE_OPTION_SINGLELINE; +					break; +				case 'p': +					*option |= MBRE_OPTION_POSIXLINE; +					break; +				case 'l': +					*option |= MBRE_OPTION_LONGEST; +					break; +				default: +					break; +				} +			} +			if (eval && (c == 'e')) { +				*eval = 1; +			} +		} +	} +} + + +/* + * php funcions + */ + +/* {{{ proto string mb_regex_encoding([string encoding]) +   Returns the current encoding as a string. */ +PHP_FUNCTION(mb_regex_encoding) +{ +	zval **arg1; +	int mbctype; + +	if (ZEND_NUM_ARGS() == 0) { +		RETVAL_STRING((char*)php_mbregex_mbctype2name(MBSTRG(current_mbctype)), 1); +	} else if (ZEND_NUM_ARGS() == 1 && zend_get_parameters_ex(1, &arg1) != FAILURE) { +		convert_to_string_ex(arg1); +		mbctype = php_mbregex_name2mbctype(Z_STRVAL_PP(arg1)); +		if (mbctype < 0) { +			php_error(E_WARNING, "unknown encoding \"%s\"", Z_STRVAL_PP(arg1)); +			RETVAL_FALSE; +		} else { +			MBSTRG(current_mbctype) = mbctype; +			RETVAL_TRUE; +		} +	} else { +		WRONG_PARAM_COUNT; +	} +} +/* }}} */ + + +/* regex match */ +static void +php_mbereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) +{ +	zval **arg_pattern, **arg_string, **array = NULL; +	mb_regex_t re; +	struct mbre_registers regs = {0, 0, 0, 0}; +	int i, err, match_len, string_len, option, beg, end; +	char *str; + +	switch(ZEND_NUM_ARGS()) { +	case 2: +		if (zend_get_parameters_ex(2, &arg_pattern, &arg_string) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; + +	case 3: +		if (zend_get_parameters_ex(3, &arg_pattern, &arg_string, &array) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; + +	default: +		WRONG_PARAM_COUNT; +		break; +	} + +	option = 0; +	if (icase) { +		option |= MBRE_OPTION_IGNORECASE; +	} + +	/* compile the regular expression from the supplied regex */ +	if (Z_TYPE_PP(arg_pattern) == IS_STRING) { +		option |= MBRE_OPTION_EXTENDED; +	} else { +		/* we convert numbers to integers and treat them as a string */ +		if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) { +			convert_to_long_ex(arg_pattern);	/* get rid of decimal places */ +		} +		convert_to_string_ex(arg_pattern); +		/* don't bother doing an extended regex with just a number */ +	} +	err = php_mbregex_compile_pattern( +	     &re, +	     Z_STRVAL_PP(arg_pattern), +	     Z_STRLEN_PP(arg_pattern), +	     option, MBSTRG(current_mbctype) TSRMLS_CC); +	if (err) { +		RETURN_FALSE; +	} + +	/* actually execute the regular expression */ +	convert_to_string_ex(arg_string); +	err = mbre_search( +	     &re, +	     Z_STRVAL_PP(arg_string), +	     Z_STRLEN_PP(arg_string), +	      0, Z_STRLEN_PP(arg_string), +	     ®s); +	if (err < 0) { +		mbre_free_registers(®s); +		RETURN_FALSE; +	} + +	match_len = 1; +	str = Z_STRVAL_PP(arg_string); +	if (array) { +		match_len = regs.end[0] - regs.beg[0]; +		string_len = Z_STRLEN_PP(arg_string); +		zval_dtor(*array);	/* start with clean array */ +		array_init(*array); +		for (i = 0; i < regs.num_regs; i++) { +			beg = regs.beg[i]; +			end = regs.end[i]; +			if (beg >= 0 && beg < end && end <= string_len) { +				add_index_stringl(*array, i, &str[beg], end - beg, 1); +			} else { +				add_index_bool(*array, i, 0); +			} +		} +	} + +	mbre_free_registers(®s); +	if (match_len == 0) { +		match_len = 1; +	} +	RETVAL_LONG(match_len); +} + +/* {{{ proto int mb_ereg(string pattern, string string [, array registers]) +   Regular expression match for multibyte string */ +PHP_FUNCTION(mb_ereg) +{ +	php_mbereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto int mb_eregi(string pattern, string string [, array registers]) +   Case-insensitive regular expression match for multibyte string */ +PHP_FUNCTION(mb_eregi) +{ +	php_mbereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + + + +/* regex replacement */ +static void +php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, int option) +{ +	zval **arg_pattern, **arg_replace, **arg_string, **arg_option; +	char *p, *string, *replace; +	mb_regex_t re; +	struct mbre_registers regs = {0, 0, 0, 0}; +	struct strbuf outdev, evaldev, *pdevice; +	int i, n, err, pos, replace_len, string_len, eval; +	char *description = NULL; +	zval retval; + +	eval = 0; +	switch(ZEND_NUM_ARGS()) { +	case 3: +		if (zend_get_parameters_ex(3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; + +	case 4: +		if (zend_get_parameters_ex(4, &arg_pattern, &arg_replace, &arg_string, &arg_option) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		convert_to_string_ex(arg_option); +		option = 0; +		php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, &eval); +		break; + +	default: +		WRONG_PARAM_COUNT; +		break; +	} + +	convert_to_string_ex(arg_pattern); +	/* create regex pattern buffer */ +	err = php_mbregex_compile_pattern( +	    &re, +	    Z_STRVAL_PP(arg_pattern), +	    Z_STRLEN_PP(arg_pattern), +	    option, MBSTRG(current_mbctype) TSRMLS_CC); +	if (err) { +		RETURN_FALSE; +	} + +	convert_to_string_ex(arg_replace); +	replace = Z_STRVAL_PP(arg_replace); +	replace_len = Z_STRLEN_PP(arg_replace); + +	convert_to_string_ex(arg_string); +	string = Z_STRVAL_PP(arg_string); +	string_len = Z_STRLEN_PP(arg_string); + +	/* initialize string buffer (auto reallocate buffer) */ +	php_mbregex_strbuf_init(&outdev); +	php_mbregex_strbuf_init(&evaldev); +	outdev.allocsz = (string_len >> 2) + 8; + +	if (eval) { +		pdevice = &evaldev; +		description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC); +	} else { +		pdevice = &outdev; +		description = NULL; +	} + +	/* do the actual work */ +	err = 0; +	pos = 0; +	while (err >= 0) { +		err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s); +		if (err <= -2) { +			php_error(E_WARNING, "mbregex search failure in php_mbereg_replace_exec()"); +			break; +		} +		if (err >= 0) { +			/* copy the part of the string before the match */ +			php_mbregex_strbuf_ncat(&outdev, &string[pos], regs.beg[0] - pos); +			/* copy replacement and backrefs */ +			i = 0; +			p = replace; +			while (i < replace_len) { +				n = -1; +				if (p[0] == '\\' && p[1] >= '0' && p[1] <= '9') { +					n = p[1] - '0'; +				} +				if (n >= 0 && n < regs.num_regs) { +					if (regs.beg[n] >= 0 && regs.beg[n] < regs.end[n] && regs.end[n] <= string_len) { +						php_mbregex_strbuf_ncat(pdevice, &string[regs.beg[n]], regs.end[n] - regs.beg[n]); +					} +					p += 2; +					i += 2; +				} else { +					php_mbregex_strbuf_ncat(pdevice, p, 1); +					p++; +					i++; +				} +			} +			if (eval) { +				/* null terminate buffer */ +				php_mbregex_strbuf_ncat(&evaldev, "\0", 1); +				/* do eval */ +				zend_eval_string(evaldev.buffer, &retval, description TSRMLS_CC); +				/* result of eval */ +				convert_to_string(&retval); +				php_mbregex_strbuf_ncat(&outdev, retval.value.str.val, retval.value.str.len); +				/* Clean up */ +				evaldev.pos = 0; +				zval_dtor(&retval); +			} +			n = regs.end[0]; +			if (pos < n) { +				pos = n; +			} else { +				pos++; +			} +		} else { /* nomatch */ +			/* stick that last bit of string on our output */ +			php_mbregex_strbuf_ncat(&outdev, &string[pos], string_len - pos); +		} +	} + +	if (description) { +		efree(description); +	} +	mbre_free_registers(®s); +	if (evaldev.buffer) { +		efree((void*)evaldev.buffer); +	} +	n = outdev.pos; +	php_mbregex_strbuf_ncat(&outdev, "\0", 1); +	if (err <= -2) { +		if (outdev.buffer) { +			efree((void*)outdev.buffer); +		} +		RETVAL_FALSE; +	} else { +		RETVAL_STRINGL(outdev.buffer, n, 0); +	} +} + +/* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option]) +   Replace regular expression for multibyte string */ +PHP_FUNCTION(mb_ereg_replace) +{ +	php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_EXTENDED); +} +/* }}} */ + +/* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string) +   Case insensitive replace regular expression for multibyte string */ +PHP_FUNCTION(mb_eregi_replace) +{ +	php_mbereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, MBRE_OPTION_EXTENDED | MBRE_OPTION_IGNORECASE); +} +/* }}} */ + + +/* {{{ proto array mb_split(string pattern, string string [, int limit]) +   split multibyte string into array by regular expression */ +PHP_FUNCTION(mb_split) +{ +	zval **arg_pat, **arg_str, **arg_count = NULL; +	mb_regex_t re; +	struct mbre_registers regs = {0, 0, 0, 0}; +	char *string; +	int n, err, count, string_len, pos; + +	count = -1; +	switch (ZEND_NUM_ARGS()) { +	case 2: +		if (zend_get_parameters_ex(2, &arg_pat, &arg_str) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; + +	case 3: +		if (zend_get_parameters_ex(3, &arg_pat, &arg_str, &arg_count) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		convert_to_long_ex(arg_count); +		count = Z_LVAL_PP(arg_count); +		break; + +	default: +		WRONG_PARAM_COUNT; +		break; +	} + +	if (array_init(return_value) == FAILURE) { +		RETURN_FALSE; +	} + +	convert_to_string_ex(arg_pat); +	convert_to_string_ex(arg_str); + +	/* create regex pattern buffer */ +	err = php_mbregex_compile_pattern( +	     &re, +	     Z_STRVAL_PP(arg_pat), +	     Z_STRLEN_PP(arg_pat), +	     MBRE_OPTION_EXTENDED, MBSTRG(current_mbctype) TSRMLS_CC); +	if (err) { +		RETURN_FALSE; +	} + +	string = Z_STRVAL_PP(arg_str); +	string_len = Z_STRLEN_PP(arg_str); +	pos = 0; +	err = 0; +	/* churn through str, generating array entries as we go */ +	while ((count != 0) && +		   (err = mbre_search(&re, string, string_len, pos, string_len - pos, ®s)) >= 0) { +		n = regs.beg[0]; +		if (n == pos) { +			/* match is at start of string, return empty string */ +			add_next_index_stringl(return_value, empty_string, 0, 1); +		} else { +			/* On a real match */ +			/* add it to the array */ +			if (n < string_len) { +				n -= pos; +				add_next_index_stringl(return_value, &string[pos], n, 1); +			} else { +				err = -2; +				break; +			} +		} +		/* point at our new starting point */ +		n = regs.end[0]; +		if (pos < n) { +			pos = n; +		} else { +			pos++; +		} +		/* if we're only looking for a certain number of points, +		   stop looking once we hit it */ +		if (count > 0) { +			count--; +		} +	} + +	mbre_free_registers(®s); + +	/* see if we encountered an error */ +	if (err <= -2) { +		php_error(E_WARNING, "mbregex search failure in mbsplit()"); +		zval_dtor(return_value); +		RETURN_FALSE; +	} + +	/* otherwise we just have one last element to add to the array */ +	n = string_len - pos; +	if (n > 0) { +		add_next_index_stringl(return_value, &string[pos], n, 1); +	} else { +		add_next_index_stringl(return_value, empty_string, 0, 1); +	} +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_match(string pattern, string string [,string option]) +   Regular expression match for multibyte string */ +PHP_FUNCTION(mb_ereg_match) +{ +	zval **arg_pattern, **arg_str, **arg_option; +	mb_regex_t re; +	int option, err; + +	option = MBRE_OPTION_EXTENDED; +	switch (ZEND_NUM_ARGS()) { +	case 2: +		if (zend_get_parameters_ex(2, &arg_pattern, &arg_str) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; +	case 3: +		if (zend_get_parameters_ex(3, &arg_pattern, &arg_str, &arg_option) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		convert_to_string_ex(arg_option); +		option = 0; +		php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); +		break; +	default: +		WRONG_PARAM_COUNT; +		break; +	} + +	/* create regex pattern buffer */ +	convert_to_string_ex(arg_pattern); +	err = php_mbregex_compile_pattern( +	    &re, +	    Z_STRVAL_PP(arg_pattern), +	    Z_STRLEN_PP(arg_pattern), +	    option, MBSTRG(current_mbctype) TSRMLS_CC); +	if (err) { +		RETURN_FALSE; +	} + +	/* match */ +	convert_to_string_ex(arg_str); +	err = mbre_match(&re, Z_STRVAL_PP(arg_str), Z_STRLEN_PP(arg_str), 0, NULL); +	if (err >= 0) { +		RETVAL_TRUE; +	} else { +		RETVAL_FALSE; +	} +} +/* }}} */ + + +/* regex search */ +static void +php_mbereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) +{ +	zval **arg_pattern, **arg_option; +	int n, i, err, pos, len, beg, end, option; +	unsigned char *str; + +	option = MBRE_OPTION_EXTENDED; +	switch (ZEND_NUM_ARGS()) { +	case 0: +		break; +	case 1: +		if (zend_get_parameters_ex(1, &arg_pattern) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; +	case 2: +		if (zend_get_parameters_ex(2, &arg_pattern, &arg_option) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		convert_to_string_ex(arg_option); +		option = 0; +		php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); +		break; +	default: +		WRONG_PARAM_COUNT; +		break; +	} +	if (ZEND_NUM_ARGS() > 0) { +		/* create regex pattern buffer */ +		convert_to_string_ex(arg_pattern); +		if (!MBSTRG(search_re)) { +			MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); +		} +		err = php_mbregex_compile_pattern( +		    MBSTRG(search_re), +		    Z_STRVAL_PP(arg_pattern), +		    Z_STRLEN_PP(arg_pattern), +		    option, MBSTRG(current_mbctype) TSRMLS_CC); +		if (err) { +			efree(MBSTRG(search_re)); +			MBSTRG(search_re) = (mb_regex_t*)0; +			RETURN_FALSE; +		} +	} + +	pos = MBSTRG(search_pos); +	str = NULL; +	len = 0; +	if (Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING){ +		str = Z_STRVAL_PP(MBSTRG(search_str)); +		len = Z_STRLEN_PP(MBSTRG(search_str)); +	} + +	if (!MBSTRG(search_re)) { +		php_error(E_WARNING, "no regex for search"); +		RETURN_FALSE; +	} +	if (!str) { +		php_error(E_WARNING, "no string for search"); +		RETURN_FALSE; +	} +	if (MBSTRG(search_regs)) { +		mbre_free_registers(MBSTRG(search_regs)); +		memset(MBSTRG(search_regs), 0, sizeof(struct mbre_registers)); +	} else { +		MBSTRG(search_regs) = (struct mbre_registers*)ecalloc(1, sizeof(struct mbre_registers)); +	} + +	err = mbre_search(MBSTRG(search_re), str, len, pos, len - pos, MBSTRG(search_regs)); + +	if (err <= -2) { +		php_error(E_WARNING, "mbregex search failure in mbregex_search()"); +		RETVAL_FALSE; +	} else if (err < 0) { +		MBSTRG(search_pos) = len; +		RETVAL_FALSE; +	} else { +		switch (mode) { +		case 1: +			if (array_init(return_value) != FAILURE) { +				beg = MBSTRG(search_regs)->beg[0]; +				end = MBSTRG(search_regs)->end[0]; +				add_next_index_long(return_value, beg); +				add_next_index_long(return_value, end - beg); +			} else { +				RETVAL_FALSE; +			} +			break; +		case 2: +			if (array_init(return_value) != FAILURE) { +				n = MBSTRG(search_regs)->num_regs; +				for (i = 0; i < n; i++) { +					beg = MBSTRG(search_regs)->beg[i]; +					end = MBSTRG(search_regs)->end[i]; +					if (beg >= 0 && beg <= end && end <= len) { +						add_index_stringl(return_value, i, &str[beg], end - beg, 1); +					} else { +						add_index_bool(return_value, i, 0); +					} +				} +			} else { +				RETVAL_FALSE; +			} +			break; +		default: +			RETVAL_TRUE; +			break; +		} +		end = MBSTRG(search_regs)->end[0]; +		if (pos < end) { +			MBSTRG(search_pos) = end; +		} else { +			MBSTRG(search_pos) = pos + 1; +		} +	} + +	if (err < 0) { +		mbre_free_registers(MBSTRG(search_regs)); +		efree(MBSTRG(search_regs)); +		MBSTRG(search_regs) = (struct mbre_registers*)0; +	} +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_search([string pattern[, string option]]) +   Regular expression search for multibyte string */ +PHP_FUNCTION(mb_ereg_search) +{ +	php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + + +/* {{{ proto array mb_ereg_search_pos([string pattern[, string option]]) +   Regular expression search for multibyte string */ +PHP_FUNCTION(mb_ereg_search_pos) +{ +	php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + + +/* {{{ proto array mb_ereg_search_regs([string pattern[, string option]]) +   Regular expression search for multibyte string */ +PHP_FUNCTION(mb_ereg_search_regs) +{ +	php_mbereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2); +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]]) +   Initialize string and regular expression for search. */ +PHP_FUNCTION(mb_ereg_search_init) +{ +	zval **arg_str, **arg_pattern, **arg_option; +	int err, option; + +	option = MBRE_OPTION_EXTENDED; +	switch (ZEND_NUM_ARGS()) { +	case 1: +		if (zend_get_parameters_ex(1, &arg_str) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; +	case 2: +		if (zend_get_parameters_ex(2, &arg_str, &arg_pattern) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		break; +	case 3: +		if (zend_get_parameters_ex(3, &arg_str, &arg_pattern, &arg_option) == FAILURE) { +			WRONG_PARAM_COUNT; +		} +		convert_to_string_ex(arg_option); +		option = 0; +		php_mbregex_init_option(Z_STRVAL_PP(arg_option), Z_STRLEN_PP(arg_option), &option, NULL); +		break; +	default: +		WRONG_PARAM_COUNT; +		break; +	} +	if (ZEND_NUM_ARGS() > 1) { +		/* create regex pattern buffer */ +		convert_to_string_ex(arg_pattern); +		if (!MBSTRG(search_re)) { +			MBSTRG(search_re) = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t)); +		} +		err = php_mbregex_compile_pattern( +		    MBSTRG(search_re), +		    Z_STRVAL_PP(arg_pattern), +		    Z_STRLEN_PP(arg_pattern), +		    option, MBSTRG(current_mbctype) TSRMLS_CC); +		if (err) { +			efree(MBSTRG(search_re)); +			MBSTRG(search_re) = (mb_regex_t*)0; +			RETURN_FALSE; +		} +	} + +	if (MBSTRG(search_str)) { +		if (ZVAL_REFCOUNT(*MBSTRG(search_str)) > 1) { +			ZVAL_DELREF(*MBSTRG(search_str)); +		} else { +			zval_dtor(*MBSTRG(search_str)); +			FREE_ZVAL(*MBSTRG(search_str)); +		} +		MBSTRG(search_str) = (zval **)0; +		MBSTRG(search_str_val) = (zval *)0; +	} + +	if (PZVAL_IS_REF(*arg_str)) { +		ZVAL_ADDREF(*arg_str); +		MBSTRG(search_str_val) = *arg_str; +		MBSTRG(search_str) = &MBSTRG(search_str_val); +	} else { +		MAKE_STD_ZVAL(MBSTRG(search_str_val)); +		*MBSTRG(search_str_val) = **arg_str; +		zval_copy_ctor(MBSTRG(search_str_val)); +		MBSTRG(search_str_val)->refcount = 1; +		MBSTRG(search_str_val)->is_ref = 0; +		MBSTRG(search_str) = &MBSTRG(search_str_val); +		convert_to_string_ex(MBSTRG(search_str)); +	} + +	MBSTRG(search_pos) = 0; + +	if (MBSTRG(search_regs)) { +		mbre_free_registers(MBSTRG(search_regs)); +		efree(MBSTRG(search_regs)); +		MBSTRG(search_regs) = (struct mbre_registers*)0; +	} + +	RETURN_TRUE; +} +/* }}} */ + + +/* {{{ proto array mb_ereg_search_getregs(void) +   Get matched substring of the last time */ +PHP_FUNCTION(mb_ereg_search_getregs) +{ +	int n, i, len, beg, end; +	unsigned char *str; + +	if (MBSTRG(search_regs) && Z_TYPE_PP(MBSTRG(search_str)) == IS_STRING && +	    Z_STRVAL_PP(MBSTRG(search_str)) && array_init(return_value) != FAILURE) { +		str = Z_STRVAL_PP(MBSTRG(search_str)); +		len = Z_STRLEN_PP(MBSTRG(search_str)); +		n = MBSTRG(search_regs)->num_regs; +		for (i = 0; i < n; i++) { +			beg = MBSTRG(search_regs)->beg[i]; +			end = MBSTRG(search_regs)->end[i]; +			if (beg >= 0 && beg <= end && end <= len) { +				add_index_stringl(return_value, i, &str[beg], end - beg, 1); +			} else { +				add_index_bool(return_value, i, 0); +			} +		} +	} else { +		RETVAL_FALSE; +	} +} +/* }}} */ + + +/* {{{ proto int mb_ereg_search_getpos(void) +   Get search start position */ +PHP_FUNCTION(mb_ereg_search_getpos) +{ +	RETVAL_LONG(MBSTRG(search_pos)); +} +/* }}} */ + + +/* {{{ proto bool mb_ereg_search_setpos(int position) +   Set search start position */ +PHP_FUNCTION(mb_ereg_search_setpos) +{ +	zval **arg_pos; +	int n; + +	if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg_pos) == FAILURE) { +		WRONG_PARAM_COUNT; +	} +	convert_to_long_ex(arg_pos); +	n = Z_LVAL_PP(arg_pos); +	if (n < 0) { +		php_error(E_WARNING, "position is minus value"); +		MBSTRG(search_pos) = 0; +		RETVAL_FALSE; +	} else { +		MBSTRG(search_pos) = n; +		RETVAL_TRUE; +	} +} +/* }}} */ + +#endif	/* HAVE_MBREGEX */ | 
