diff options
| author | Andrei Zmievski <andrei@php.net> | 2000-11-23 17:25:13 +0000 | 
|---|---|---|
| committer | Andrei Zmievski <andrei@php.net> | 2000-11-23 17:25:13 +0000 | 
| commit | 0af5c36d9402be4fa2952f5e137abd68f26bc503 (patch) | |
| tree | c8aae309774bd48b1fff78b1f1a986af812a9700 /ext/pcre/php_pcre.c | |
| parent | f03b949134ee2179963fbf88ddca312134642b4f (diff) | |
| download | php-git-0af5c36d9402be4fa2952f5e137abd68f26bc503.tar.gz | |
This submission introduces two features.
The first one is support for Perl-style matching regexp delimiters, i.e.
using <[{( and )}]> to delimit the regular expressions.
The second one is a new 'F' modifier that allows you to specify a function name
in the replacement argument to preg_replace(). This function will be called
when the replacement needs to be made. It is passed an array of full matched
pattern and captured subpatterns and it is expected to return a string that
will be used for replacement. 'e' and 'F' modifiers cannot be used together.
@- Implemented support for Perl-style matching regexp delimiters in PCRE.
@  You can use <{[( and )]}> to delimit your expressions now. (Andrei)
@- Introduced new 'F' modifier in PCRE that lets you specify a function
@  name in the replacement argument to preg_replace() that will be called
@  at run-time to provide the replacement string. It is passed an array of
@  matched pattern and subpatterns. (Andrei)
Diffstat (limited to 'ext/pcre/php_pcre.c')
| -rw-r--r-- | ext/pcre/php_pcre.c | 121 | 
1 files changed, 101 insertions, 20 deletions
| diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index d4a022bbdc..53cf7da06d 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -41,6 +41,7 @@  #define	PREG_SPLIT_NO_EMPTY	(1<<0)  #define PREG_REPLACE_EVAL	(1<<0) +#define PREG_REPLACE_FUNC	(1<<1)  #ifdef ZTS  int pcre_globals_id; @@ -147,6 +148,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o  	const char	 		*error;  	int			 	 	 erroffset;  	char		 	 	 delimiter; +	char				 start_delimiter; +	char				 end_delimiter;  	char 				*p, *pp;  	char				*pattern;  	int				 	 regex_len; @@ -192,20 +195,47 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o  		zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");  		return NULL;  	} -	 -	/* We need to iterate through the pattern, searching for the ending delimiter, -	   but skipping the backslashed delimiters.  If the ending delimiter is not -	   found, display a warning. */ -	pp = p; -	while (*pp != 0) { -		if (*pp == '\\' && pp[1] != 0) pp++; -		else if (*pp == delimiter) -			break; -		pp++; -	} -	if (*pp == 0) { -		zend_error(E_WARNING, "No ending delimiter found"); -		return NULL; + +	start_delimiter = delimiter; +	if ((pp = strchr("([{< )]}> )]}>", delimiter))) +		delimiter = pp[5]; +	end_delimiter = delimiter; + +	if (start_delimiter == end_delimiter) { +		/* We need to iterate through the pattern, searching for the ending delimiter, +		   but skipping the backslashed delimiters.  If the ending delimiter is not +		   found, display a warning. */ +		pp = p; +		while (*pp != 0) { +			if (*pp == '\\' && pp[1] != 0) pp++; +			else if (*pp == delimiter) +				break; +			pp++; +		} +		if (*pp == 0) { +			zend_error(E_WARNING, "No ending delimiter '%c' found", delimiter); +			return NULL; +		} +	} else { +		/* We iterate through the pattern, searching for the matching ending +		 * delimiter. For each matching starting delimiter, we increment nesting +		 * level, and decrement it for each matching ending delimiter. If we +		 * reach the end of the pattern without matching, display a warning. +		 */ +		int brackets = 1; 	/* brackets nesting level */ +		pp = p; +		while (*pp != 0) { +			if (*pp == '\\' && pp[1] != 0) pp++; +			else if (*pp == end_delimiter && --brackets <= 0) +				break; +			else if (*pp == start_delimiter) +				brackets++; +			pp++; +		} +		if (*pp == 0) { +			zend_error(E_WARNING, "No ending matching delimiter '%c' found", end_delimiter); +			return NULL; +		}  	}  	/* Make a copy of the actual pattern. */ @@ -235,7 +265,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o  			case 'X':	coptions |= PCRE_EXTRA;			break;  			/* Custom preg options */ -			case 'e':	poptions |= PREG_REPLACE_EVAL; break; +			case 'e':	poptions |= PREG_REPLACE_EVAL;	break; +			case 'F':	poptions |= PREG_REPLACE_FUNC;	break;  			case ' ':  			case '\n': @@ -247,6 +278,12 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o  				return NULL;  		}  	} + +	if ((poptions & PREG_REPLACE_EVAL) && (poptions & PREG_REPLACE_FUNC)) { +		zend_error(E_WARNING, "'e' and 'F' modifiers cannot be used together"); +		efree(pattern); +		return NULL; +	}  #if HAVE_SETLOCALE  	if (strcmp(locale, "C")) @@ -526,6 +563,40 @@ static inline int preg_get_backref(const char *walk, int *backref)  	return 1;	  } +static int preg_do_repl_func(char *function_name, char *subject, int *offsets, int count, char **result) +{ +	zval		*retval_ptr;		/* Function return value */ +	zval		 function;			/* Function to call */ +	zval		*function_ptr = &function;		/* Pointer to function to call */ +	zval	   **args[0];			/* Argument to pass to function */ +	zval		*subpats;			/* Captured subpatterns */  +	int			 result_len;		/* Return value length */ +	int			 i; +	CLS_FETCH(); + +	ZVAL_STRING(function_ptr, function_name, 0); + +	MAKE_STD_ZVAL(subpats); +	array_init(subpats); +	for (i = 0; i < count; i++) +		add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); +	args[0] = &subpats; + +	if (call_user_function_ex(CG(function_table), NULL, function_ptr, &retval_ptr, 1, args, 0, NULL) == SUCCESS && retval_ptr) { +		convert_to_string_ex(&retval_ptr); +		*result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr)); +		result_len = Z_STRLEN_P(retval_ptr); +		zval_ptr_dtor(&retval_ptr); +	} else { +		php_error(E_WARNING, "Unable to call custom replacement function %s()", function_name); +		*result = empty_string; +		result_len = 0; +	} +	zval_dtor(subpats); +	FREE_ZVAL(subpats); + +	return result_len; +}  static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,  						int *offsets, int count, char **result) @@ -630,10 +701,13 @@ char *php_pcre_replace(char *regex,   int regex_len,  	int			 	 size_offsets;		/* Size of the offsets array */  	int				 new_len;			/* Length of needed storage */  	int				 alloc_len;			/* Actual allocated length */ -	int				 eval_result_len=0;	/* Length of the eval'ed string */ +	int				 eval_result_len=0;	/* Length of the eval'ed or +										   function-returned string */  	int				 match_len;			/* Length of the current match */  	int				 backref;			/* Backreference number */  	int				 eval;				/* If the replacement string should be eval'ed */ +	int				 use_func;			/* If the matches should be run through +										   a function to get the replacement string */  	int				 start_offset;		/* Where the new search starts */  	int				 g_notempty = 0;	/* If the match should not be empty */  	char			*result,			/* Result of replacement */ @@ -643,7 +717,7 @@ char *php_pcre_replace(char *regex,   int regex_len,  					*match,				/* The current match */  					*piece,				/* The current piece of subject */  					*replace_end,		/* End of replacement string */ -					*eval_result,		/* Result of eval */ +					*eval_result,		/* Result of eval or custom function */  					 walk_last;			/* Last walked character */  	/* Compile regex or get it from cache. */ @@ -670,6 +744,7 @@ char *php_pcre_replace(char *regex,   int regex_len,  	start_offset = 0;  	replace_end = replace + replace_len;  	eval = preg_options & PREG_REPLACE_EVAL; +	use_func = preg_options & PREG_REPLACE_FUNC;  	while (1) {  		/* Execute the regular expression. */ @@ -695,6 +770,11 @@ char *php_pcre_replace(char *regex,   int regex_len,  				eval_result_len = preg_do_eval(replace, replace_len, subject,  											   offsets, count, &eval_result);  				new_len += eval_result_len; +			} else if (use_func) { +				/* Use custom function to get replacement string and its length. */ +				eval_result_len = preg_do_repl_func(replace, subject, offsets, +													count, &eval_result); +				new_len += eval_result_len;  			} else { /* do regular substitution */  				walk = replace;  				walk_last = 0; @@ -726,11 +806,12 @@ char *php_pcre_replace(char *regex,   int regex_len,  			/* copy replacement and backrefs */  			walkbuf = result + *result_len; -			/* If evaluating, copy result to the buffer and clean up */ -			if (eval) { +			/* If evaluating or using custom function, copy result to the buffer +			 * and clean up. */ +			if (eval || use_func) {  				memcpy(walkbuf, eval_result, eval_result_len);  				*result_len += eval_result_len; -				efree(eval_result); +				STR_FREE(eval_result);  			} else { /* do regular backreference copying */  				walk = replace;  				walk_last = 0; | 
