diff options
| author | Christoph M. Becker <cmbecker69@gmx.de> | 2015-06-05 14:40:03 +0200 | 
|---|---|---|
| committer | Christoph M. Becker <cmb@php.net> | 2015-06-23 19:28:09 +0200 | 
| commit | 23e25f3319db021298310fb97cf537bcef4095ad (patch) | |
| tree | e595b94026eabd022be487fdd7d44b1fbfd78d98 /ext/pcre/php_pcre.c | |
| parent | eef6b10b7cea10f8e0e4037b6287c4abd7aa1033 (diff) | |
| download | php-git-23e25f3319db021298310fb97cf537bcef4095ad.tar.gz | |
Fixed Bug #53823 (preg_replace: * qualifier on unicode replace garbles the string)
When advancing after empty matches, php_pcre_match_impl() as well as
php_pcre_replace_impl() always have to advance to the next code point when the
u modifier is given, instead of to the next byte.
Diffstat (limited to 'ext/pcre/php_pcre.c')
| -rw-r--r-- | ext/pcre/php_pcre.c | 31 | 
1 files changed, 27 insertions, 4 deletions
| diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e7274b841d..7cc16ca6e6 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -225,6 +225,25 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D  }  /* }}} */ +/* {{{ static calculate_unit_length */ +/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */ +static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start) +{ +	int unit_len; + +	if (pce->compile_options & PCRE_UTF8) { +		char *end = start; + +		/* skip continuation bytes */ +		while ((*++end & 0xC0) == 0x80); +		unit_len = end - start; +	} else { +		unit_len = 1; +	} +	return unit_len; +} +/* }}} */ +  /* {{{ pcre_get_compiled_regex_cache   */  PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) @@ -758,8 +777,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec  			   the start offset, and continue. Fudge the offset values  			   to achieve this, unless we're already at the end of the string. */  			if (g_notempty != 0 && start_offset < subject_len) { +				int unit_len = calculate_unit_length(pce, subject + start_offset); +				  				offsets[0] = start_offset; -				offsets[1] = start_offset + 1; +				offsets[1] = start_offset + unit_len;  			} else  				break;  		} else { @@ -1206,10 +1227,12 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub  			   the start offset, and continue. Fudge the offset values  			   to achieve this, unless we're already at the end of the string. */  			if (g_notempty != 0 && start_offset < subject_len) { +				int unit_len = calculate_unit_length(pce, piece); +  				offsets[0] = start_offset; -				offsets[1] = start_offset + 1; -				memcpy(&result[*result_len], piece, 1); -				(*result_len)++; +				offsets[1] = start_offset + unit_len; +				memcpy(&result[*result_len], piece, unit_len); +				*result_len += unit_len;  			} else {  				new_len = *result_len + subject_len - start_offset;  				if (new_len + 1 > alloc_len) { | 
