diff options
| author | Christoph M. Becker <cmb@php.net> | 2016-07-21 18:36:12 +0200 |
|---|---|---|
| committer | Christoph M. Becker <cmb@php.net> | 2016-07-21 18:37:24 +0200 |
| commit | f2c2a4be9e466f14677089efe33e20ca0b146809 (patch) | |
| tree | 49ec75acf924c245fe4573b9542af245b9caceaa /ext | |
| parent | f813f438552bf52d7d7d6213e12ad67d9e5bbeec (diff) | |
| download | php-git-f2c2a4be9e466f14677089efe33e20ca0b146809.tar.gz | |
Fix #72330: CSV fields incorrectly split if escape char followed by UTF chars
We must not forget to properly reset the state for multibyte characters
following an escape character.
Diffstat (limited to 'ext')
| -rw-r--r-- | ext/standard/file.c | 1 | ||||
| -rw-r--r-- | ext/standard/tests/file/bug72330.phpt | 26 |
2 files changed, 27 insertions, 0 deletions
diff --git a/ext/standard/file.c b/ext/standard/file.c index f8c4e0450b..d8471fff1c 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -2219,6 +2219,7 @@ PHPAPI void php_fgetcsv(php_stream *stream, char delimiter, char enclosure, char memcpy(tptr, hunk_begin, bptr - hunk_begin); tptr += (bptr - hunk_begin); hunk_begin = bptr; + state = 0; break; default: bptr += inc_len; diff --git a/ext/standard/tests/file/bug72330.phpt b/ext/standard/tests/file/bug72330.phpt new file mode 100644 index 0000000000..843032ae2d --- /dev/null +++ b/ext/standard/tests/file/bug72330.phpt @@ -0,0 +1,26 @@ +--TEST-- +Bug #72330 (CSV fields incorrectly split if escape char followed by UTF chars) +--SKIPIF-- +<?php +if (setlocale(LC_ALL, "en_US.utf8", "en_AU.utf8", "ko_KR.utf8", "zh_CN.utf8", "de_DE.utf8", "es_EC.utf8", "fr_FR.utf8", "ja_JP.utf8", "el_GR.utf8", "nl_NL.utf8") === false) { + die('skip available locales not usable'); +} +?> +--FILE-- +<?php +setlocale(LC_ALL, "en_US.utf8", "en_AU.utf8", "ko_KR.utf8", "zh_CN.utf8", "de_DE.utf8", "es_EC.utf8", "fr_FR.utf8", "ja_JP.utf8", "el_GR.utf8", "nl_NL.utf8"); + +$utf_1 = chr(0xD1) . chr(0x81); // U+0440; +$utf_2 = chr(0xD8) . chr(0x80); // U+0600 + +$string = '"first #' . $utf_1 . $utf_2 . '";"second"'; +$fields = str_getcsv($string, ';', '"', "#"); +var_dump($fields); +?> +--EXPECT-- +array(2) { + [0]=> + string(11) "first #с" + [1]=> + string(6) "second" +} |
