diff options
author | Felipe Pena <felipe@php.net> | 2010-10-03 16:01:38 +0000 |
---|---|---|
committer | Felipe Pena <felipe@php.net> | 2010-10-03 16:01:38 +0000 |
commit | 090a9b33316a448f1af9ba865484fa5dafeda4a7 (patch) | |
tree | c9a293b2fe14068c177e60781222929ffd0e98fe /ext | |
parent | 4b0927b042a3b30f1dfad9ed89fe7e132ac8a040 (diff) | |
download | php-git-090a9b33316a448f1af9ba865484fa5dafeda4a7.tar.gz |
- Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8)
# In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
# characters, even in UTF-8 mode. However, this can be changed by setting
# the PCRE_UCP option.
Diffstat (limited to 'ext')
-rw-r--r-- | ext/pcre/php_pcre.c | 9 | ||||
-rw-r--r-- | ext/pcre/tests/bug52971.phpt | 43 |
2 files changed, 51 insertions, 1 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index dcbc98fb85..ccb0a51c0e 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -350,7 +350,14 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le case 'S': do_study = 1; break; case 'U': coptions |= PCRE_UNGREEDY; break; case 'X': coptions |= PCRE_EXTRA; break; - case 'u': coptions |= PCRE_UTF8; break; + case 'u': coptions |= PCRE_UTF8; + /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII + characters, even in UTF-8 mode. However, this can be changed by setting + the PCRE_UCP option. */ +#ifdef PCRE_UCP + coptions |= PCRE_UCP; +#endif + break; /* Custom preg options */ case 'e': poptions |= PREG_REPLACE_EVAL; break; diff --git a/ext/pcre/tests/bug52971.phpt b/ext/pcre/tests/bug52971.phpt new file mode 100644 index 0000000000..5949cb220c --- /dev/null +++ b/ext/pcre/tests/bug52971.phpt @@ -0,0 +1,43 @@ +--TEST-- +Bug #52971 (PCRE-Meta-Characters not working with utf-8) +--SKIPIF-- +<?php if ((double)PCRE_VERSION < 8.1) die('skip PCRE_VERSION >= 8.1 is required!'); ?> +--FILE-- +<?php + +$message = 'Der ist ein Süßwasserpool Süsswasserpool ... verschiedene Wassersportmöglichkeiten bei ...'; + +$pattern = '/\bwasser/iu'; +preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE); +var_dump($match); + +$pattern = '/[^\w]wasser/iu'; +preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE); +var_dump($match); + +?> +--EXPECTF-- +array(1) { + [0]=> + array(1) { + [0]=> + array(2) { + [0]=> + string(6) "Wasser" + [1]=> + int(61) + } + } +} +array(1) { + [0]=> + array(1) { + [0]=> + array(2) { + [0]=> + string(7) " Wasser" + [1]=> + int(60) + } + } +} |