From cbd108abf19d9fb9ae1d4ccd153215f56a2763e8 Mon Sep 17 00:00:00 2001 From: Yasuo Ohgaki Date: Thu, 13 Feb 2014 11:54:52 +0900 Subject: Implement RFC https://wiki.php.net/rfc/default_encoding --- ext/mbstring/mbstring.c | 85 ++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 51 deletions(-) (limited to 'ext/mbstring/mbstring.c') diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a3adbc357b..1ae43602d1 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1236,6 +1236,11 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) if (MBSTRG(http_input_list)) { pefree(MBSTRG(http_input_list), 1); } + if (SUCCESS == php_mb_parse_encoding_list(PG(input_encoding), strlen(PG(input_encoding))+1, &list, &size, 1 TSRMLS_CC)) { + MBSTRG(http_input_list) = list; + MBSTRG(http_input_list_size) = 0; + return SUCCESS; + } MBSTRG(http_input_list) = NULL; MBSTRG(http_input_list_size) = 0; return SUCCESS; @@ -1261,18 +1266,20 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) const mbfl_encoding *encoding; if (new_value == NULL || new_value_length == 0) { - MBSTRG(http_output_encoding) = &mbfl_encoding_pass; - MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; - return SUCCESS; - } - - encoding = mbfl_name2encoding(new_value); - if (!encoding) { - MBSTRG(http_output_encoding) = &mbfl_encoding_pass; - MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; - return FAILURE; + encoding = mbfl_name2encoding(PG(output_encoding)); + if (!encoding) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return SUCCESS; + } + } else { + encoding = mbfl_name2encoding(new_value); + if (!encoding) { + MBSTRG(http_output_encoding) = &mbfl_encoding_pass; + MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; + return FAILURE; + } } - MBSTRG(http_output_encoding) = encoding; MBSTRG(current_http_output_encoding) = encoding; return SUCCESS; @@ -1285,47 +1292,17 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v const mbfl_encoding *encoding; if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) { - switch (MBSTRG(language)) { - case mbfl_no_language_uni: - encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); - break; - case mbfl_no_language_japanese: - encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp); - break; - case mbfl_no_language_korean: - encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr); - break; - case mbfl_no_language_simplified_chinese: - encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn); - break; - case mbfl_no_language_traditional_chinese: - encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw); - break; - case mbfl_no_language_russian: - encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r); - break; - case mbfl_no_language_german: - encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15); - break; - case mbfl_no_language_armenian: - encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8); - break; - case mbfl_no_language_turkish: - encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9); - break; - default: - encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1); - break; - } - } + /* falls back to UTF-8 if an unkown encoding name is given */ + encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); + } MBSTRG(internal_encoding) = encoding; MBSTRG(current_internal_encoding) = encoding; #if HAVE_MBREGEX { const char *enc_name = new_value; if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) { - /* falls back to EUC-JP if an unknown encoding name is given */ - enc_name = "EUC-JP"; + /* falls back to UTF-8 if an unknown encoding name is given */ + enc_name = "UTF-8"; php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC); } php_mb_regex_set_mbctype(new_value TSRMLS_CC); @@ -1343,7 +1320,11 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) } if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN || stage == PHP_INI_STAGE_RUNTIME) { - return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); + if (new_value_length) { + return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); + } else { + return _php_mb_ini_mbstring_internal_encoding_set(PG(internal_encoding), strlen(PG(internal_encoding))+1 TSRMLS_CC); + } } else { /* the corresponding mbstring globals needs to be set according to the * ini value in the later stage because it never falls back to the @@ -1450,8 +1431,8 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes) PHP_INI_BEGIN() PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language) PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) - PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) - PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) + PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input) + PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output) STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) STD_PHP_INI_ENTRY("mbstring.func_overload", "0", @@ -2162,8 +2143,10 @@ PHP_FUNCTION(mb_output_handler) /* feed the string */ mbfl_string_init(&string); - string.no_language = MBSTRG(language); - string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + /* these are not needed. convd has encoding info. + string.no_language = MBSTRG(language); + string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; + */ string.val = (unsigned char *)arg_string; string.len = arg_string_len; mbfl_buffer_converter_feed(MBSTRG(outconv), &string); -- cgit v1.2.1 From a7ec000f3526076fce684557bee0324a3f8b70ad Mon Sep 17 00:00:00 2001 From: Veres Lajos Date: Fri, 14 Feb 2014 17:16:17 +0200 Subject: a few typofixes --- ext/mbstring/mbstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ext/mbstring/mbstring.c') diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 1ae43602d1..e7f08a3256 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1292,7 +1292,7 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v const mbfl_encoding *encoding; if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) { - /* falls back to UTF-8 if an unkown encoding name is given */ + /* falls back to UTF-8 if an unknown encoding name is given */ encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); } MBSTRG(internal_encoding) = encoding; -- cgit v1.2.1 From ca927bb47ea7702532922b648d7a6feba034fca0 Mon Sep 17 00:00:00 2001 From: Yasuo Ohgaki Date: Wed, 12 Mar 2014 17:54:22 +0900 Subject: Add missing E_DEPRECATED error for https://wiki.php.net/rfc/default_encoding --- ext/mbstring/mbstring.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'ext/mbstring/mbstring.c') diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e7f08a3256..001f40a5f9 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1256,6 +1256,10 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) MBSTRG(http_input_list) = list; MBSTRG(http_input_list_size) = size; + if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) { + php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_input is deprecated"); + } + return SUCCESS; } /* }}} */ @@ -1282,6 +1286,11 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) } MBSTRG(http_output_encoding) = encoding; MBSTRG(current_http_output_encoding) = encoding; + + if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) { + php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_output is deprecated"); + } + return SUCCESS; } /* }}} */ @@ -1315,11 +1324,15 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) { + if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) { + php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated"); + } + if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) { return FAILURE; } - if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN - || stage == PHP_INI_STAGE_RUNTIME) { + + if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) { if (new_value_length) { return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); } else { -- cgit v1.2.1 From a84e5dc37dc0ff8c313164d9db141d3d9f2b2730 Mon Sep 17 00:00:00 2001 From: Yasuo Ohgaki Date: Mon, 24 Mar 2014 11:04:35 +0900 Subject: Remove unneeded string copy. Allow to set ''(empty string values) internal/input/output_encoding for better compatibility. i.e. Runtime INI value changes. More compliance to the RFC. Improve/add encoding handling tests. i.e. Rather than set encoding automagic way, detect it. --- ext/mbstring/mbstring.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'ext/mbstring/mbstring.c') diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 001f40a5f9..7d4eacf14d 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -601,6 +601,34 @@ static sapi_post_entry php_post_entries[] = { ZEND_GET_MODULE(mbstring) #endif +static char *get_internal_encoding(TSRMLS_D) { + if (PG(internal_encoding) && PG(internal_encoding)[0]) { + return PG(internal_encoding); + } else if (SG(default_charset)) { + return SG(default_charset); + } + return ""; +} + +static char *get_input_encoding(TSRMLS_D) { + if (PG(input_encoding) && PG(input_encoding)[0]) { + return PG(input_encoding); + } else if (SG(default_charset)) { + return SG(default_charset); + } + return ""; +} + +static char *get_output_encoding(TSRMLS_D) { + if (PG(output_encoding) && PG(output_encoding)[0]) { + return PG(output_encoding); + } else if (SG(default_charset)) { + return SG(default_charset); + } + return ""; +} + + /* {{{ allocators */ static void *_php_mb_allocators_malloc(unsigned int sz) { @@ -1236,9 +1264,9 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input) if (MBSTRG(http_input_list)) { pefree(MBSTRG(http_input_list), 1); } - if (SUCCESS == php_mb_parse_encoding_list(PG(input_encoding), strlen(PG(input_encoding))+1, &list, &size, 1 TSRMLS_CC)) { + if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(TSRMLS_C), strlen(get_input_encoding(TSRMLS_C))+1, &list, &size, 1 TSRMLS_CC)) { MBSTRG(http_input_list) = list; - MBSTRG(http_input_list_size) = 0; + MBSTRG(http_input_list_size) = size; return SUCCESS; } MBSTRG(http_input_list) = NULL; @@ -1270,7 +1298,7 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) const mbfl_encoding *encoding; if (new_value == NULL || new_value_length == 0) { - encoding = mbfl_name2encoding(PG(output_encoding)); + encoding = mbfl_name2encoding(get_output_encoding(TSRMLS_C)); if (!encoding) { MBSTRG(http_output_encoding) = &mbfl_encoding_pass; MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; @@ -1336,7 +1364,7 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) if (new_value_length) { return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); } else { - return _php_mb_ini_mbstring_internal_encoding_set(PG(internal_encoding), strlen(PG(internal_encoding))+1 TSRMLS_CC); + return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(TSRMLS_C), strlen(get_internal_encoding(TSRMLS_C))+1 TSRMLS_CC); } } else { /* the corresponding mbstring globals needs to be set according to the -- cgit v1.2.1