summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c1435
1 files changed, 646 insertions, 789 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 3d55074acc..76654edbf8 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -62,6 +62,7 @@
#include "ext/standard/info.h"
#include "libmbfl/mbfl/mbfl_allocators.h"
+#include "libmbfl/mbfl/mbfilter_pass.h"
#include "php_variables.h"
#include "php_globals.h"
@@ -77,9 +78,7 @@
#include "php_mbregex.h"
#endif
-#ifdef ZEND_MULTIBYTE
#include "zend_multibyte.h"
-#endif /* ZEND_MULTIBYTE */
#if HAVE_ONIG
#include "php_onig_compat.h"
@@ -98,19 +97,21 @@ ZEND_DECLARE_MODULE_GLOBALS(mbstring)
static PHP_GINIT_FUNCTION(mbstring);
static PHP_GSHUTDOWN_FUNCTION(mbstring);
-#ifdef ZEND_MULTIBYTE
-static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
-static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
-static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
-static int php_mb_set_zend_encoding(TSRMLS_D);
-#endif
+static void php_mb_populate_current_detect_order_list(TSRMLS_D);
+
+static int php_mb_encoding_translation(TSRMLS_D);
+
+static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
+
+static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
+
/* }}} */
/* {{{ php_mb_default_identify_list */
typedef struct _php_mb_nls_ident_list {
enum mbfl_no_language lang;
- const enum mbfl_no_encoding* list;
- int list_size;
+ const enum mbfl_no_encoding *list;
+ size_t list_size;
} php_mb_nls_ident_list;
static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
@@ -411,6 +412,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
ZEND_ARG_INFO(0, string)
ZEND_ARG_INFO(0, convmap)
ZEND_ARG_INFO(0, encoding)
+ ZEND_ARG_INFO(0, is_hex)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
@@ -465,6 +467,13 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
ZEND_ARG_INFO(0, string)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
+ ZEND_ARG_INFO(0, pattern)
+ ZEND_ARG_INFO(0, callback)
+ ZEND_ARG_INFO(0, string)
+ ZEND_ARG_INFO(0, option)
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
ZEND_ARG_INFO(0, pattern)
ZEND_ARG_INFO(0, string)
@@ -563,7 +572,7 @@ const zend_function_entry mbstring_functions[] = {
/* {{{ zend_module_entry mbstring_module_entry */
zend_module_entry mbstring_module_entry = {
- STANDARD_MODULE_HEADER,
+ STANDARD_MODULE_HEADER,
"mbstring",
mbstring_functions,
PHP_MINIT(mbstring),
@@ -571,11 +580,11 @@ zend_module_entry mbstring_module_entry = {
PHP_RINIT(mbstring),
PHP_RSHUTDOWN(mbstring),
PHP_MINFO(mbstring),
- NO_VERSION_YET,
- PHP_MODULE_GLOBALS(mbstring),
- PHP_GINIT(mbstring),
- PHP_GSHUTDOWN(mbstring),
- NULL,
+ NO_VERSION_YET,
+ PHP_MODULE_GLOBALS(mbstring),
+ PHP_GINIT(mbstring),
+ PHP_GSHUTDOWN(mbstring),
+ NULL,
STANDARD_MODULE_PROPERTIES_EX
};
/* }}} */
@@ -653,12 +662,12 @@ static sapi_post_entry mbstr_post_entries[] = {
* of parsed encodings.
*/
static int
-php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
+php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
{
- int n, l, size, bauto, ret = 1;
+ int size, bauto, ret = SUCCESS;
+ size_t n;
char *p, *p1, *p2, *endp, *tmpstr;
- enum mbfl_no_encoding no_encoding;
- enum mbfl_no_encoding *src, *entry, *list;
+ const mbfl_encoding **entry, **list;
list = NULL;
if (value == NULL || value_length <= 0) {
@@ -668,14 +677,8 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
if (return_size) {
*return_size = 0;
}
- return 0;
+ return FAILURE;
} else {
- enum mbfl_no_encoding *identify_list;
- int identify_list_size;
-
- identify_list = MBSTRG(default_detect_order_list);
- identify_list_size = MBSTRG(default_detect_order_list_size);
-
/* copy the value string for work */
if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
tmpstr = (char *)estrndup(value+1, value_length-2);
@@ -684,7 +687,7 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
else
tmpstr = (char *)estrndup(value, value_length);
if (tmpstr == NULL) {
- return 0;
+ return FAILURE;
}
/* count the number of listed encoding names */
endp = tmpstr + value_length;
@@ -694,9 +697,9 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
p1 = p2 + 1;
n++;
}
- size = n + identify_list_size;
+ size = n + MBSTRG(default_detect_order_list_size);
/* make list */
- list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
+ list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
if (list != NULL) {
entry = list;
n = 0;
@@ -720,19 +723,19 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
/* convert to the encoding number and check encoding */
if (strcasecmp(p1, "auto") == 0) {
if (!bauto) {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
+ size_t i;
bauto = 1;
- l = identify_list_size;
- src = identify_list;
- while (l > 0) {
- *entry++ = *src++;
- l--;
+ for (i = 0; i < identify_list_size; i++) {
+ *entry++ = mbfl_no2encoding(*src++);
n++;
}
}
} else {
- no_encoding = mbfl_name2no_encoding(p1);
- if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
+ const mbfl_encoding *encoding = mbfl_name2encoding(p1);
+ if (encoding) {
+ *entry++ = encoding;
n++;
} else {
ret = 0;
@@ -772,39 +775,26 @@ php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_enc
}
/* }}} */
-/* {{{ MBSTRING_API php_mb_check_encoding_list */
-MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
- return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
-}
-/* }}} */
-
/* {{{ static int php_mb_parse_encoding_array()
* Return 0 if input contains any illegal encoding, otherwise 1.
* Even if any illegal encoding is detected the result may contain a list
* of parsed encodings.
*/
static int
-php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
+php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
{
zval **hash_entry;
HashTable *target_hash;
- int i, n, l, size, bauto,ret = 1;
- enum mbfl_no_encoding no_encoding;
- enum mbfl_no_encoding *src, *list, *entry;
+ int i, n, size, bauto, ret = SUCCESS;
+ const mbfl_encoding **list, **entry;
list = NULL;
if (Z_TYPE_P(array) == IS_ARRAY) {
- enum mbfl_no_encoding *identify_list;
- int identify_list_size;
-
- identify_list = MBSTRG(default_detect_order_list);
- identify_list_size = MBSTRG(default_detect_order_list_size);
-
target_hash = Z_ARRVAL_P(array);
zend_hash_internal_pointer_reset(target_hash);
i = zend_hash_num_elements(target_hash);
- size = i + identify_list_size;
- list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
+ size = i + MBSTRG(default_detect_order_list_size);
+ list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
if (list != NULL) {
entry = list;
bauto = 0;
@@ -816,22 +806,23 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
convert_to_string_ex(hash_entry);
if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
if (!bauto) {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
+ size_t j;
+
bauto = 1;
- l = identify_list_size;
- src = identify_list;
- while (l > 0) {
- *entry++ = *src++;
- l--;
+ for (j = 0; j < identify_list_size; j++) {
+ *entry++ = mbfl_no2encoding(*src++);
n++;
}
}
} else {
- no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
- if (no_encoding != mbfl_no_encoding_invalid) {
- *entry++ = no_encoding;
+ const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
+ if (encoding) {
+ *entry++ = encoding;
n++;
} else {
- ret = 0;
+ ret = FAILURE;
}
}
zend_hash_move_forward(target_hash);
@@ -848,7 +839,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
if (return_list) {
*return_list = NULL;
}
- ret = 0;
+ ret = FAILURE;
}
if (return_size) {
*return_size = n;
@@ -860,7 +851,7 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
if (return_size) {
*return_size = 0;
}
- ret = 0;
+ ret = FAILURE;
}
}
@@ -868,6 +859,118 @@ php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, in
}
/* }}} */
+/* {{{ zend_multibyte interface */
+static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
+{
+ return (const zend_encoding*)mbfl_name2encoding(encoding_name);
+}
+
+static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
+{
+ return ((const mbfl_encoding *)encoding)->name;
+}
+
+static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
+{
+ const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
+ if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+ return 1;
+ }
+ if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
+ return 1;
+ }
+ return 0;
+}
+
+static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
+{
+ mbfl_string string;
+
+ if (!list) {
+ list = (const zend_encoding **)MBSTRG(current_detect_order_list);
+ list_size = MBSTRG(current_detect_order_list_size);
+ }
+
+ mbfl_string_init(&string);
+ string.no_language = MBSTRG(language);
+ string.val = (unsigned char *)arg_string;
+ string.len = arg_length;
+ return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
+}
+
+static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
+{
+ mbfl_string string, result;
+ mbfl_buffer_converter *convd;
+ int status, loc;
+
+ /* new encoding */
+ /* initialize string */
+ mbfl_string_init(&string);
+ mbfl_string_init(&result);
+ string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
+ string.no_language = MBSTRG(language);
+ string.val = (unsigned char*)from;
+ string.len = from_length;
+
+ /* initialize converter */
+ convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
+ if (convd == NULL) {
+ return -1;
+ }
+ mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+
+ /* do it */
+ status = mbfl_buffer_converter_feed2(convd, &string, &loc);
+ if (status) {
+ mbfl_buffer_converter_delete(convd);
+ return (size_t)-1;
+ }
+
+ mbfl_buffer_converter_flush(convd);
+ if (!mbfl_buffer_converter_result(convd, &result)) {
+ mbfl_buffer_converter_delete(convd);
+ return (size_t)-1;
+ }
+
+ *to = result.val;
+ *to_length = result.len;
+
+ mbfl_buffer_converter_delete(convd);
+
+ return loc;
+}
+
+static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
+{
+ return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
+}
+
+static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
+{
+ return (const zend_encoding *)MBSTRG(internal_encoding);
+}
+
+static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
+{
+ MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
+ return SUCCESS;
+}
+
+static zend_multibyte_functions php_mb_zend_multibyte_functions = {
+ "mbstring",
+ php_mb_zend_encoding_fetcher,
+ php_mb_zend_encoding_name_getter,
+ php_mb_zend_encoding_lexer_compatibility_checker,
+ php_mb_zend_encoding_detector,
+ php_mb_zend_encoding_converter,
+ php_mb_zend_encoding_list_parser,
+ php_mb_zend_internal_encoding_getter,
+ php_mb_zend_internal_encoding_setter
+};
+/* }}} */
+
static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
static void _php_mb_free_regex(void *opaque);
@@ -942,7 +1045,7 @@ static void _php_mb_free_regex(void *opaque)
#endif
/* {{{ php_mb_nls_get_default_detect_order_list */
-static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
+static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
{
size_t i;
@@ -960,6 +1063,124 @@ static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,
}
/* }}} */
+static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
+{
+ char *result = emalloc(len + 2);
+ char *resp = result;
+ int i;
+
+ for (i = 0; i < len && start[i] != quote; ++i) {
+ if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
+ *resp++ = start[++i];
+ } else {
+ size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
+
+ while (j-- > 0 && i < len) {
+ *resp++ = start[i++];
+ }
+ --i;
+ }
+ }
+
+ *resp = '\0';
+ return result;
+}
+
+static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
+{
+ char *pos = *line, quote;
+ char *res;
+
+ while (*pos && *pos != stop) {
+ if ((quote = *pos) == '"' || quote == '\'') {
+ ++pos;
+ while (*pos && *pos != quote) {
+ if (*pos == '\\' && pos[1] && pos[1] == quote) {
+ pos += 2;
+ } else {
+ ++pos;
+ }
+ }
+ if (*pos) {
+ ++pos;
+ }
+ } else {
+ pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
+
+ }
+ }
+ if (*pos == '\0') {
+ res = estrdup(*line);
+ *line += strlen(*line);
+ return res;
+ }
+
+ res = estrndup(*line, pos - *line);
+
+ while (*pos == stop) {
+ pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
+ }
+
+ *line = pos;
+ return res;
+}
+/* }}} */
+
+static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
+{
+ while (*str && isspace(*(unsigned char *)str)) {
+ ++str;
+ }
+
+ if (!*str) {
+ return estrdup("");
+ }
+
+ if (*str == '"' || *str == '\'') {
+ char quote = *str;
+
+ str++;
+ return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
+ } else {
+ char *strend = str;
+
+ while (*strend && !isspace(*(unsigned char *)strend)) {
+ ++strend;
+ }
+ return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
+ }
+}
+/* }}} */
+
+static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
+{
+ char *s, *s2;
+ const size_t filename_len = strlen(filename);
+
+ /* The \ check should technically be needed for win32 systems only where
+ * it is a valid path separator. However, IE in all it's wisdom always sends
+ * the full path of the file on the user's filesystem, which means that unless
+ * the user does basename() they get a bogus file name. Until IE's user base drops
+ * to nill or problem is fixed this code must remain enabled for all systems. */
+ s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
+ s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
+
+ if (s && s2) {
+ if (s > s2) {
+ return ++s;
+ } else {
+ return ++s2;
+ }
+ } else if (s) {
+ return ++s;
+ } else if (s2) {
+ return ++s2;
+ } else {
+ return filename;
+ }
+}
+/* }}} */
+
/* {{{ php.ini directive handler */
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
static PHP_INI_MH(OnUpdate_mbstring_language)
@@ -980,23 +1201,27 @@ static PHP_INI_MH(OnUpdate_mbstring_language)
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
static PHP_INI_MH(OnUpdate_mbstring_detect_order)
{
- enum mbfl_no_encoding *list;
- int size;
+ const mbfl_encoding **list;
+ size_t size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
- if (MBSTRG(detect_order_list)) {
- free(MBSTRG(detect_order_list));
- }
- MBSTRG(detect_order_list) = list;
- MBSTRG(detect_order_list_size) = size;
- } else {
+ if (!new_value) {
if (MBSTRG(detect_order_list)) {
- free(MBSTRG(detect_order_list));
- MBSTRG(detect_order_list) = NULL;
+ pefree(MBSTRG(detect_order_list), 1);
}
+ MBSTRG(detect_order_list) = NULL;
+ MBSTRG(detect_order_list_size) = 0;
+ return SUCCESS;
+ }
+
+ if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
return FAILURE;
}
+ if (MBSTRG(detect_order_list)) {
+ pefree(MBSTRG(detect_order_list), 1);
+ }
+ MBSTRG(detect_order_list) = list;
+ MBSTRG(detect_order_list_size) = size;
return SUCCESS;
}
/* }}} */
@@ -1004,24 +1229,28 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order)
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
static PHP_INI_MH(OnUpdate_mbstring_http_input)
{
- enum mbfl_no_encoding *list;
- int size;
+ const mbfl_encoding **list;
+ size_t size;
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
- if (MBSTRG(http_input_list)) {
- free(MBSTRG(http_input_list));
- }
- MBSTRG(http_input_list) = list;
- MBSTRG(http_input_list_size) = size;
- } else {
+ if (!new_value) {
if (MBSTRG(http_input_list)) {
- free(MBSTRG(http_input_list));
- MBSTRG(http_input_list) = NULL;
+ pefree(MBSTRG(http_input_list), 1);
}
+ MBSTRG(http_input_list) = NULL;
MBSTRG(http_input_list_size) = 0;
+ return SUCCESS;
+ }
+
+ if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
return FAILURE;
}
+ if (MBSTRG(http_input_list)) {
+ pefree(MBSTRG(http_input_list), 1);
+ }
+ MBSTRG(http_input_list) = list;
+ MBSTRG(http_input_list_size) = size;
+
return SUCCESS;
}
/* }}} */
@@ -1029,20 +1258,23 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input)
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
static PHP_INI_MH(OnUpdate_mbstring_http_output)
{
- enum mbfl_no_encoding no_encoding;
+ const mbfl_encoding *encoding;
- no_encoding = mbfl_name2no_encoding(new_value);
- if (no_encoding != mbfl_no_encoding_invalid) {
- MBSTRG(http_output_encoding) = no_encoding;
- MBSTRG(current_http_output_encoding) = no_encoding;
- } else {
- MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
- MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
- if (new_value != NULL && new_value_length > 0) {
- return FAILURE;
- }
+ if (new_value == NULL || new_value_length == 0) {
+ MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
+ MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
+ return SUCCESS;
+ }
+
+ encoding = mbfl_name2encoding(new_value);
+ if (!encoding) {
+ MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
+ MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
+ return FAILURE;
}
+ MBSTRG(http_output_encoding) = encoding;
+ MBSTRG(current_http_output_encoding) = encoding;
return SUCCESS;
}
/* }}} */
@@ -1050,62 +1282,44 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
{
- enum mbfl_no_encoding no_encoding;
- const char *enc_name = NULL;
- uint enc_name_len = 0;
-
- no_encoding = new_value ? mbfl_name2no_encoding(new_value):
- mbfl_no_encoding_invalid;
- if (no_encoding != mbfl_no_encoding_invalid) {
- enc_name = new_value;
- enc_name_len = new_value_length;
- } else {
+ const mbfl_encoding *encoding;
+
+ if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
switch (MBSTRG(language)) {
case mbfl_no_language_uni:
- enc_name = "UTF-8";
- enc_name_len = sizeof("UTF-8") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
break;
case mbfl_no_language_japanese:
- enc_name = "EUC-JP";
- enc_name_len = sizeof("EUC-JP") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
break;
case mbfl_no_language_korean:
- enc_name = "EUC-KR";
- enc_name_len = sizeof("EUC-KR") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
break;
case mbfl_no_language_simplified_chinese:
- enc_name = "EUC-CN";
- enc_name_len = sizeof("EUC-CN") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
break;
case mbfl_no_language_traditional_chinese:
- enc_name = "EUC-TW";
- enc_name_len = sizeof("EUC-TW") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
break;
case mbfl_no_language_russian:
- enc_name = "KOI8-R";
- enc_name_len = sizeof("KOI8-R") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
break;
case mbfl_no_language_german:
- enc_name = "ISO-8859-15";
- enc_name_len = sizeof("ISO-8859-15") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
break;
case mbfl_no_language_armenian:
- enc_name = "ArmSCII-8";
- enc_name_len = sizeof("ArmSCII-8") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
break;
case mbfl_no_language_turkish:
- enc_name = "ISO-8859-9";
- enc_name_len = sizeof("ISO-8859-9") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
break;
default:
- enc_name = "ISO-8859-1";
- enc_name_len = sizeof("ISO-8859-1") - 1;
+ encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
break;
}
- no_encoding = mbfl_name2no_encoding(enc_name);
}
- MBSTRG(internal_encoding) = no_encoding;
- MBSTRG(current_internal_encoding) = no_encoding;
+ MBSTRG(internal_encoding) = encoding;
+ MBSTRG(current_internal_encoding) = encoding;
#if HAVE_MBREGEX
{
const char *enc_name = new_value;
@@ -1124,6 +1338,9 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
{
+ if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
+ return FAILURE;
+ }
if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
|| stage == PHP_INI_STAGE_RUNTIME) {
return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
@@ -1139,32 +1356,6 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
}
/* }}} */
-#ifdef ZEND_MULTIBYTE
-/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
-static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
-{
- int *list, size;
-
- if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
- if (MBSTRG(script_encoding_list) != NULL) {
- free(MBSTRG(script_encoding_list));
- }
- MBSTRG(script_encoding_list) = list;
- MBSTRG(script_encoding_list_size) = size;
- } else {
- if (MBSTRG(script_encoding_list) != NULL) {
- free(MBSTRG(script_encoding_list));
- }
- MBSTRG(script_encoding_list) = NULL;
- MBSTRG(script_encoding_list_size) = 0;
- return FAILURE;
- }
-
- return SUCCESS;
-}
-/* }}} */
-#endif /* ZEND_MULTIBYTE */
-
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
{
@@ -1207,7 +1398,7 @@ static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
{
if (new_value == NULL) {
- return FAILURE;
+ return FAILURE;
}
OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
@@ -1261,10 +1452,7 @@ PHP_INI_BEGIN()
PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
- PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
-#ifdef ZEND_MULTIBYTE
- PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
-#endif /* ZEND_MULTIBYTE */
+ STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
@@ -1289,19 +1477,15 @@ PHP_INI_END()
static PHP_GINIT_FUNCTION(mbstring)
{
mbstring_globals->language = mbfl_no_language_uni;
- mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
+ mbstring_globals->internal_encoding = NULL;
mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
-#ifdef ZEND_MULTIBYTE
- mbstring_globals->script_encoding_list = NULL;
- mbstring_globals->script_encoding_list_size = 0;
-#endif /* ZEND_MULTIBYTE */
- mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
- mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
- mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
- mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
+ mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
+ mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
+ mbstring_globals->http_input_identify = NULL;
+ mbstring_globals->http_input_identify_get = NULL;
+ mbstring_globals->http_input_identify_post = NULL;
+ mbstring_globals->http_input_identify_cookie = NULL;
+ mbstring_globals->http_input_identify_string = NULL;
mbstring_globals->http_input_list = NULL;
mbstring_globals->http_input_list_size = 0;
mbstring_globals->detect_order_list = NULL;
@@ -1332,11 +1516,6 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring)
if (mbstring_globals->http_input_list) {
free(mbstring_globals->http_input_list);
}
-#ifdef ZEND_MULTIBYTE
- if (mbstring_globals->script_encoding_list) {
- free(mbstring_globals->script_encoding_list);
- }
-#endif /* ZEND_MULTIBYTE */
if (mbstring_globals->detect_order_list) {
free(mbstring_globals->detect_order_list);
}
@@ -1357,7 +1536,7 @@ PHP_MINIT_FUNCTION(mbstring)
REGISTER_INI_ENTRIES();
/* This is a global handler. Should not be set in a per-request handler. */
- sapi_register_treat_data(mbstr_treat_data);
+ sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
/* Post handlers are stored in the thread-local context. */
if (MBSTRG(encoding_translation)) {
@@ -1375,6 +1554,19 @@ PHP_MINIT_FUNCTION(mbstring)
#if HAVE_MBREGEX
PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
+
+ if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
+ return FAILURE;
+ }
+
+ php_rfc1867_set_multibyte_callbacks(
+ php_mb_encoding_translation,
+ php_mb_gpc_get_detect_order,
+ php_mb_gpc_set_input_encoding,
+ php_mb_rfc1867_getword,
+ php_mb_rfc1867_getword_conf,
+ php_mb_rfc1867_basename);
+
return SUCCESS;
}
/* }}} */
@@ -1383,7 +1575,7 @@ PHP_MINIT_FUNCTION(mbstring)
PHP_MSHUTDOWN_FUNCTION(mbstring)
{
UNREGISTER_INI_ENTRIES();
-
+
#if HAVE_MBREGEX
PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
@@ -1395,8 +1587,6 @@ PHP_MSHUTDOWN_FUNCTION(mbstring)
/* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)
{
- int n;
- enum mbfl_no_encoding *list=NULL, *entry;
zend_function *func, *orig;
const struct mb_overload_def *p;
@@ -1407,22 +1597,7 @@ PHP_RINIT_FUNCTION(mbstring)
MBSTRG(illegalchars) = 0;
- n = 0;
- if (MBSTRG(detect_order_list)) {
- list = MBSTRG(detect_order_list);
- n = MBSTRG(detect_order_list_size);
- }
- if (n <= 0) {
- list = MBSTRG(default_detect_order_list);
- n = MBSTRG(default_detect_order_list_size);
- }
- entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
- MBSTRG(current_detect_order_list) = entry;
- MBSTRG(current_detect_order_list_size) = n;
- while (n > 0) {
- *entry++ = *list++;
- n--;
- }
+ php_mb_populate_current_detect_order_list(TSRMLS_C);
/* override original function. */
if (MBSTRG(func_overload)){
@@ -1454,10 +1629,7 @@ PHP_RINIT_FUNCTION(mbstring)
#if HAVE_MBREGEX
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
-#ifdef ZEND_MULTIBYTE
- zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
- php_mb_set_zend_encoding(TSRMLS_C);
-#endif /* ZEND_MULTIBYTE */
+ zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
return SUCCESS;
}
@@ -1481,11 +1653,11 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
}
/* clear http input identification. */
- MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
- MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
+ MBSTRG(http_input_identify) = NULL;
+ MBSTRG(http_input_identify_post) = NULL;
+ MBSTRG(http_input_identify_get) = NULL;
+ MBSTRG(http_input_identify_cookie) = NULL;
+ MBSTRG(http_input_identify_string) = NULL;
/* clear overloaded function. */
if (MBSTRG(func_overload)){
@@ -1516,7 +1688,12 @@ PHP_MINFO_FUNCTION(mbstring)
php_info_print_table_start();
php_info_print_table_row(2, "Multibyte Support", "enabled");
php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
- php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
+ php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
+ {
+ char tmp[256];
+ snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
+ php_info_print_table_row(2, "libmbfl version", tmp);
+ }
php_info_print_table_end();
php_info_print_table_start();
@@ -1560,33 +1737,27 @@ PHP_FUNCTION(mb_language)
Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)
{
- char *name = NULL;
+ const char *name = NULL;
int name_len;
- enum mbfl_no_encoding no_encoding;
+ const mbfl_encoding *encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
RETURN_FALSE;
}
if (name == NULL) {
- name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
+ name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
if (name != NULL) {
RETURN_STRING(name, 1);
} else {
RETURN_FALSE;
}
} else {
- no_encoding = mbfl_name2no_encoding(name);
- if (no_encoding == mbfl_no_encoding_invalid) {
+ encoding = mbfl_name2encoding(name);
+ if (!encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
RETURN_FALSE;
} else {
- MBSTRG(current_internal_encoding) = no_encoding;
-#ifdef ZEND_MULTIBYTE
- /* TODO: make independent from mbstring.encoding_translation? */
- if (MBSTRG(encoding_translation)) {
- zend_multibyte_set_internal_encoding(name TSRMLS_CC);
- }
-#endif /* ZEND_MULTIBYTE */
+ MBSTRG(current_internal_encoding) = encoding;
RETURN_TRUE;
}
}
@@ -1599,10 +1770,9 @@ PHP_FUNCTION(mb_http_input)
{
char *typ = NULL;
int typ_len;
- int retname, n;
- char *name, *list, *temp;
- enum mbfl_no_encoding *entry;
- enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
+ int retname;
+ char *list, *temp;
+ const mbfl_encoding *result = NULL;
retname = 1;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
@@ -1630,40 +1800,38 @@ PHP_FUNCTION(mb_http_input)
break;
case 'I':
case 'i':
- array_init(return_value);
- entry = MBSTRG(http_input_list);
- n = MBSTRG(http_input_list_size);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
+ {
+ const mbfl_encoding **entry = MBSTRG(http_input_list);
+ const size_t n = MBSTRG(http_input_list_size);
+ size_t i;
+ array_init(return_value);
+ for (i = 0; i < n; i++) {
+ add_next_index_string(return_value, (*entry)->name, 1);
+ entry++;
}
- entry++;
- n--;
+ retname = 0;
}
- retname = 0;
break;
case 'L':
case 'l':
- entry = MBSTRG(http_input_list);
- n = MBSTRG(http_input_list_size);
- list = NULL;
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
+ {
+ const mbfl_encoding **entry = MBSTRG(http_input_list);
+ const size_t n = MBSTRG(http_input_list_size);
+ size_t i;
+ list = NULL;
+ for (i = 0; i < n; i++) {
if (list) {
temp = list;
- spprintf(&list, 0, "%s,%s", temp, name);
+ spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
efree(temp);
if (!list) {
break;
}
} else {
- list = estrdup(name);
+ list = estrdup((*entry)->name);
}
+ entry++;
}
- entry++;
- n--;
}
if (!list) {
RETURN_FALSE;
@@ -1678,9 +1846,8 @@ PHP_FUNCTION(mb_http_input)
}
if (retname) {
- if (result != mbfl_no_encoding_invalid &&
- (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
- RETVAL_STRING(name, 1);
+ if (result) {
+ RETVAL_STRING(result->name, 1);
} else {
RETVAL_FALSE;
}
@@ -1692,28 +1859,28 @@ PHP_FUNCTION(mb_http_input)
Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)
{
- char *name = NULL;
+ const char *name = NULL;
int name_len;
- enum mbfl_no_encoding no_encoding;
+ const mbfl_encoding *encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
RETURN_FALSE;
}
if (name == NULL) {
- name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
+ name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
if (name != NULL) {
RETURN_STRING(name, 1);
} else {
RETURN_FALSE;
}
} else {
- no_encoding = mbfl_name2no_encoding(name);
- if (no_encoding == mbfl_no_encoding_invalid) {
+ encoding = mbfl_name2encoding(name);
+ if (!encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
RETURN_FALSE;
} else {
- MBSTRG(current_http_output_encoding) = no_encoding;
+ MBSTRG(current_http_output_encoding) = encoding;
RETURN_TRUE;
}
}
@@ -1725,32 +1892,26 @@ PHP_FUNCTION(mb_http_output)
PHP_FUNCTION(mb_detect_order)
{
zval **arg1 = NULL;
- int n, size;
- enum mbfl_no_encoding *list, *entry;
- char *name;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
return;
}
if (!arg1) {
+ size_t i;
+ size_t n = MBSTRG(current_detect_order_list_size);
+ const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
array_init(return_value);
- entry = MBSTRG(current_detect_order_list);
- n = MBSTRG(current_detect_order_list_size);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
- }
+ for (i = 0; i < n; i++) {
+ add_next_index_string(return_value, (*entry)->name, 1);
entry++;
- n--;
}
} else {
- list = NULL;
- size = 0;
+ const mbfl_encoding **list = NULL;
+ size_t size = 0;
switch (Z_TYPE_PP(arg1)) {
case IS_ARRAY:
- if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
}
@@ -1759,7 +1920,7 @@ PHP_FUNCTION(mb_detect_order)
break;
default:
convert_to_string_ex(arg1);
- if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
}
@@ -1879,15 +2040,15 @@ PHP_FUNCTION(mb_parse_str)
char *encstr = NULL;
int encstr_len;
php_mb_encoding_handler_info_t info;
- enum mbfl_no_encoding detected;
+ const mbfl_encoding *detected;
track_vars_array = NULL;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
return;
}
- /* Clear out the array */
if (track_vars_array != NULL) {
+ /* Clear out the array */
zval_dtor(track_vars_array);
array_init(track_vars_array);
}
@@ -1896,7 +2057,6 @@ PHP_FUNCTION(mb_parse_str)
info.data_type = PARSE_STRING;
info.separator = PG(arg_separator).input;
- info.force_register_globals = (track_vars_array == NULL);
info.report_errors = 1;
info.to_encoding = MBSTRG(current_internal_encoding);
info.to_language = MBSTRG(language);
@@ -1904,11 +2064,20 @@ PHP_FUNCTION(mb_parse_str)
info.num_from_encodings = MBSTRG(http_input_list_size);
info.from_language = MBSTRG(language);
- detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
+ if (track_vars_array != NULL) {
+ detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
+ } else {
+ zval tmp;
+ if (!EG(active_symbol_table)) {
+ zend_rebuild_symbol_table(TSRMLS_C);
+ }
+ Z_ARRVAL(tmp) = EG(active_symbol_table);
+ detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
+ }
MBSTRG(http_input_identify) = detected;
- RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
+ RETVAL_BOOL(detected);
if (encstr != NULL) efree(encstr);
}
@@ -1924,7 +2093,7 @@ PHP_FUNCTION(mb_output_handler)
mbfl_string string, result;
const char *charset;
char *p;
- enum mbfl_no_encoding encoding;
+ const mbfl_encoding *encoding;
int last_feed, len;
unsigned char send_text_mimetype = 0;
char *s, *mimetype = NULL;
@@ -1943,7 +2112,7 @@ PHP_FUNCTION(mb_output_handler)
mbfl_buffer_converter_delete(MBSTRG(outconv));
MBSTRG(outconv) = NULL;
}
- if (encoding == mbfl_no_encoding_pass) {
+ if (encoding == &mbfl_encoding_pass) {
RETURN_STRINGL(arg_string, arg_string_len, 1);
}
@@ -1965,7 +2134,7 @@ PHP_FUNCTION(mb_output_handler)
/* if content-type is not yet set, set it and activate the converter */
if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
- charset = mbfl_no2preferred_mime_name(encoding);
+ charset = encoding->mime_name;
if (charset) {
len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
if (sapi_add_header(p, len, 0) != FAILURE) {
@@ -1973,7 +2142,7 @@ PHP_FUNCTION(mb_output_handler)
}
}
/* activate the converter */
- MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
+ MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
if (send_text_mimetype){
efree(mimetype);
}
@@ -1994,7 +2163,7 @@ PHP_FUNCTION(mb_output_handler)
/* feed the string */
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
string.val = (unsigned char *)arg_string;
string.len = arg_string_len;
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
@@ -2031,7 +2200,7 @@ PHP_FUNCTION(mb_strlen)
string.no_language = MBSTRG(language);
if (enc_name == NULL) {
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
} else {
string.no_encoding = mbfl_name2no_encoding(enc_name);
if (string.no_encoding == mbfl_no_encoding_invalid) {
@@ -2058,13 +2227,13 @@ PHP_FUNCTION(mb_strpos)
mbfl_string haystack, needle;
char *enc_name = NULL;
int enc_name_len;
-
+
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
offset = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
@@ -2129,9 +2298,9 @@ PHP_FUNCTION(mb_strrpos)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
RETURN_FALSE;
@@ -2218,7 +2387,7 @@ PHP_FUNCTION(mb_stripos)
int n;
long offset;
mbfl_string haystack, needle;
- char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
int from_encoding_len;
n = -1;
offset = 0;
@@ -2247,7 +2416,7 @@ PHP_FUNCTION(mb_strripos)
int n;
long offset;
mbfl_string haystack, needle;
- const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
int from_encoding_len;
n = -1;
offset = 0;
@@ -2279,9 +2448,9 @@ PHP_FUNCTION(mb_strstr)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
RETURN_FALSE;
@@ -2337,9 +2506,9 @@ PHP_FUNCTION(mb_strrchr)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
RETURN_FALSE;
@@ -2392,13 +2561,13 @@ PHP_FUNCTION(mb_stristr)
unsigned int from_encoding_len, len, mblen;
int n;
mbfl_string haystack, needle, result, *ret = NULL;
- const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
@@ -2450,13 +2619,13 @@ PHP_FUNCTION(mb_strrichr)
zend_bool part = 0;
int n, from_encoding_len, len, mblen;
mbfl_string haystack, needle, result, *ret = NULL;
- char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->name;
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
@@ -2508,9 +2677,9 @@ PHP_FUNCTION(mb_substr_count)
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
return;
@@ -2546,15 +2715,16 @@ PHP_FUNCTION(mb_substr)
char *str, *encoding;
long from, len;
int mblen, str_len, encoding_len;
+ zval **z_len = NULL;
mbfl_string string, result, *ret;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
return;
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (argc == 4) {
string.no_encoding = mbfl_name2no_encoding(encoding);
@@ -2567,8 +2737,11 @@ PHP_FUNCTION(mb_substr)
string.val = (unsigned char *)str;
string.len = str_len;
- if (argc < 3) {
+ if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
len = str_len;
+ } else {
+ convert_to_long_ex(z_len);
+ len = Z_LVAL_PP(z_len);
}
/* measures length */
@@ -2619,13 +2792,14 @@ PHP_FUNCTION(mb_strcut)
char *encoding;
long from, len;
int encoding_len;
+ zval **z_len = NULL;
mbfl_string string, result, *ret;
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
return;
}
@@ -2637,8 +2811,11 @@ PHP_FUNCTION(mb_strcut)
}
}
- if (argc < 3) {
+ if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
len = string.len;
+ } else {
+ convert_to_long_ex(z_len);
+ len = Z_LVAL_PP(z_len);
}
/* if "from" position is negative, count start position from the end
@@ -2686,7 +2863,7 @@ PHP_FUNCTION(mb_strwidth)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
return;
@@ -2725,9 +2902,9 @@ PHP_FUNCTION(mb_strimwidth)
mbfl_string_init(&string);
mbfl_string_init(&marker);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
marker.no_language = MBSTRG(language);
- marker.no_encoding = MBSTRG(current_internal_encoding);
+ marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
marker.val = NULL;
marker.len = 0;
@@ -2771,9 +2948,10 @@ PHP_FUNCTION(mb_strimwidth)
MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
{
mbfl_string string, result, *ret;
- enum mbfl_no_encoding from_encoding, to_encoding;
+ const mbfl_encoding *from_encoding, *to_encoding;
mbfl_buffer_converter *convd;
- int size, *list;
+ size_t size;
+ const mbfl_encoding **list;
char *output=NULL;
if (output_len) {
@@ -2784,8 +2962,8 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
}
/* new encoding */
if (_to_encoding && strlen(_to_encoding)) {
- to_encoding = mbfl_name2no_encoding(_to_encoding);
- if (to_encoding == mbfl_no_encoding_invalid) {
+ to_encoding = mbfl_name2encoding(_to_encoding);
+ if (!to_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
return NULL;
}
@@ -2797,7 +2975,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
mbfl_string_init(&string);
mbfl_string_init(&result);
from_encoding = MBSTRG(current_internal_encoding);
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
string.val = (unsigned char *)input;
string.len = length;
@@ -2806,20 +2984,20 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
if (_from_encodings) {
list = NULL;
size = 0;
- php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
+ php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
if (size == 1) {
from_encoding = *list;
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
} else if (size > 1) {
/* auto detect */
- from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
- if (from_encoding != mbfl_no_encoding_invalid) {
- string.no_encoding = from_encoding;
+ from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
+ if (from_encoding) {
+ string.no_encoding = from_encoding->no_encoding;
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
- from_encoding = mbfl_no_encoding_pass;
+ from_encoding = &mbfl_encoding_pass;
to_encoding = from_encoding;
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
}
} else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
@@ -2830,7 +3008,7 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
}
/* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
+ convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
return NULL;
@@ -2931,7 +3109,8 @@ PHP_FUNCTION(mb_convert_encoding)
Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)
{
- char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ char *str;
int str_len, from_encoding_len;
long case_mode = 0;
char *newstr;
@@ -2955,7 +3134,8 @@ PHP_FUNCTION(mb_convert_case)
*/
PHP_FUNCTION(mb_strtoupper)
{
- char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ char *str;
int str_len, from_encoding_len;
char *newstr;
size_t ret_len;
@@ -2978,7 +3158,8 @@ PHP_FUNCTION(mb_strtoupper)
*/
PHP_FUNCTION(mb_strtolower)
{
- char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
+ const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ char *str;
int str_len, from_encoding_len;
char *newstr;
size_t ret_len;
@@ -3006,9 +3187,9 @@ PHP_FUNCTION(mb_detect_encoding)
zval *encoding_list;
mbfl_string string;
- const char *ret;
- enum mbfl_no_encoding *elist;
- int size, *list;
+ const mbfl_encoding *ret;
+ const mbfl_encoding **elist, **list;
+ size_t size;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
return;
@@ -3020,7 +3201,7 @@ PHP_FUNCTION(mb_detect_encoding)
if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
switch (Z_TYPE_P(encoding_list)) {
case IS_ARRAY:
- if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
list = NULL;
@@ -3030,7 +3211,7 @@ PHP_FUNCTION(mb_detect_encoding)
break;
default:
convert_to_string(encoding_list);
- if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
+ if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
if (list) {
efree(list);
list = NULL;
@@ -3059,7 +3240,7 @@ PHP_FUNCTION(mb_detect_encoding)
string.no_language = MBSTRG(language);
string.val = (unsigned char *)str;
string.len = str_len;
- ret = mbfl_identify_encoding_name(&string, elist, size, strict);
+ ret = mbfl_identify_encoding2(&string, elist, size, strict);
if (list != NULL) {
efree((void *)list);
@@ -3069,7 +3250,7 @@ PHP_FUNCTION(mb_detect_encoding)
RETURN_FALSE;
}
- RETVAL_STRING((char *)ret, 1);
+ RETVAL_STRING((char *)ret->name, 1);
}
/* }}} */
@@ -3134,7 +3315,7 @@ PHP_FUNCTION(mb_encode_mimeheader)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
return;
@@ -3168,7 +3349,7 @@ PHP_FUNCTION(mb_encode_mimeheader)
mbfl_string_init(&result);
ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
if (ret != NULL) {
- RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
+ RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
} else {
RETVAL_FALSE;
}
@@ -3183,16 +3364,16 @@ PHP_FUNCTION(mb_decode_mimeheader)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
return;
}
mbfl_string_init(&result);
- ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
+ ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
if (ret != NULL) {
- RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
+ RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
} else {
RETVAL_FALSE;
}
@@ -3212,7 +3393,7 @@ PHP_FUNCTION(mb_convert_kana)
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
return;
@@ -3311,21 +3492,22 @@ PHP_FUNCTION(mb_convert_variables)
zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
HashTable *target_hash;
mbfl_string string, result, *ret;
- enum mbfl_no_encoding from_encoding, to_encoding;
+ const mbfl_encoding *from_encoding, *to_encoding;
mbfl_encoding_detector *identd;
mbfl_buffer_converter *convd;
- int n, to_enc_len, argc, stack_level, stack_max, elistsz;
- enum mbfl_no_encoding *elist;
- char *name, *to_enc;
+ int n, to_enc_len, argc, stack_level, stack_max;
+ size_t elistsz;
+ const mbfl_encoding **elist;
+ char *to_enc;
void *ptmp;
-
+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
return;
}
/* new encoding */
- to_encoding = mbfl_name2no_encoding(to_enc);
- if (to_encoding == mbfl_no_encoding_invalid) {
+ to_encoding = mbfl_name2encoding(to_enc);
+ if (!to_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
efree(args);
RETURN_FALSE;
@@ -3335,7 +3517,7 @@ PHP_FUNCTION(mb_convert_variables)
mbfl_string_init(&string);
mbfl_string_init(&result);
from_encoding = MBSTRG(current_internal_encoding);
- string.no_encoding = from_encoding;
+ string.no_encoding = from_encoding->no_encoding;
string.no_language = MBSTRG(language);
/* pre-conversion encoding */
@@ -3351,16 +3533,16 @@ PHP_FUNCTION(mb_convert_variables)
break;
}
if (elistsz <= 0) {
- from_encoding = mbfl_no_encoding_pass;
+ from_encoding = &mbfl_encoding_pass;
} else if (elistsz == 1) {
from_encoding = *elist;
} else {
/* auto detect */
- from_encoding = mbfl_no_encoding_invalid;
+ from_encoding = NULL;
stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
stack_level = 0;
- identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
+ identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
if (identd != NULL) {
n = 0;
while (n < argc || stack_level > 0) {
@@ -3413,14 +3595,14 @@ PHP_FUNCTION(mb_convert_variables)
}
}
detect_end:
- from_encoding = mbfl_encoding_detector_judge(identd);
+ from_encoding = mbfl_encoding_detector_judge2(identd);
mbfl_encoding_detector_delete(identd);
}
efree(stack);
- if (from_encoding == mbfl_no_encoding_invalid) {
+ if (!from_encoding) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
- from_encoding = mbfl_no_encoding_pass;
+ from_encoding = &mbfl_encoding_pass;
}
}
if (elist != NULL) {
@@ -3428,8 +3610,8 @@ detect_end:
}
/* create converter */
convd = NULL;
- if (from_encoding != mbfl_no_encoding_pass) {
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+ if (from_encoding != &mbfl_encoding_pass) {
+ convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
RETURN_FALSE;
@@ -3511,9 +3693,8 @@ detect_end:
efree(args);
- name = (char *)mbfl_no_encoding2name(from_encoding);
- if (name != NULL) {
- RETURN_STRING(name, 1);
+ if (from_encoding) {
+ RETURN_STRING(from_encoding->name, 1);
} else {
RETURN_FALSE;
}
@@ -3531,21 +3712,22 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
HashTable *target_hash;
size_t argc = ZEND_NUM_ARGS();
int i, *convmap, *mapelm, mapsize=0;
+ zend_bool is_hex = 0;
mbfl_string string, result, *ret;
enum mbfl_no_encoding no_encoding;
- if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
+ if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
return;
}
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.no_encoding = MBSTRG(current_internal_encoding);
+ string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
string.val = (unsigned char *)str;
string.len = str_len;
/* encoding */
- if (argc == 3) {
+ if ((argc == 3 || argc == 4) && encoding_len > 0) {
no_encoding = mbfl_name2no_encoding(encoding);
if (no_encoding == mbfl_no_encoding_invalid) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
@@ -3555,6 +3737,12 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
}
}
+ if (argc == 4) {
+ if (type == 0 && is_hex) {
+ type = 2; /* output in hex format */
+ }
+ }
+
/* conversion map */
convmap = NULL;
if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
@@ -3592,7 +3780,7 @@ php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
}
/* }}} */
-/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
+/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)
{
@@ -3852,9 +4040,9 @@ PHP_FUNCTION(mb_send_mail)
mbfl_string orig_str, conv_str;
mbfl_string *pstr; /* pointer to mbfl string for return value */
enum mbfl_no_encoding
- tran_cs, /* transfar text charset */
- head_enc, /* header transfar encoding */
- body_enc; /* body transfar encoding */
+ tran_cs, /* transfar text charset */
+ head_enc, /* header transfar encoding */
+ body_enc; /* body transfar encoding */
mbfl_memory_device device; /* automatic allocateable buffer for additional header */
const mbfl_language *lang;
int err = 0;
@@ -3862,12 +4050,7 @@ PHP_FUNCTION(mb_send_mail)
smart_str *s;
extern void mbfl_memory_device_unput(mbfl_memory_device *device);
char *pp, *ee;
-
- if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE.");
- RETURN_FALSE;
- }
-
+
/* initialize */
mbfl_memory_device_init(&device, 0, 0);
mbfl_string_init(&orig_str);
@@ -3966,15 +4149,15 @@ PHP_FUNCTION(mb_send_mail)
/* To: */
if (to != NULL) {
- if (to_len > 0) {
- to_r = estrndup(to, to_len);
- for (; to_len; to_len--) {
- if (!isspace((unsigned char) to_r[to_len - 1])) {
- break;
- }
- to_r[to_len - 1] = '\0';
- }
- for (i = 0; to_r[i]; i++) {
+ if (to_len > 0) {
+ to_r = estrndup(to, to_len);
+ for (; to_len; to_len--) {
+ if (!isspace((unsigned char) to_r[to_len - 1])) {
+ break;
+ }
+ to_r[to_len - 1] = '\0';
+ }
+ for (i = 0; to_r[i]; i++) {
if (iscntrl((unsigned char) to_r[i])) {
/* According to RFC 822, section 3.1.1 long headers may be separated into
* parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
@@ -3984,11 +4167,11 @@ PHP_FUNCTION(mb_send_mail)
SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
to_r[i] = ' ';
}
- }
- } else {
- to_r = to;
- }
- } else {
+ }
+ } else {
+ to_r = to;
+ }
+ } else {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
err = 1;
}
@@ -3998,10 +4181,10 @@ PHP_FUNCTION(mb_send_mail)
orig_str.no_language = MBSTRG(language);
orig_str.val = (unsigned char *)subject;
orig_str.len = subject_len;
- orig_str.no_encoding = MBSTRG(current_internal_encoding);
- if (orig_str.no_encoding == mbfl_no_encoding_invalid
- || orig_str.no_encoding == mbfl_no_encoding_pass) {
- orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
+ if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
+ const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
}
pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
if (pstr != NULL) {
@@ -4017,11 +4200,11 @@ PHP_FUNCTION(mb_send_mail)
orig_str.no_language = MBSTRG(language);
orig_str.val = (unsigned char *)message;
orig_str.len = (unsigned int)message_len;
- orig_str.no_encoding = MBSTRG(current_internal_encoding);
+ orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
- if (orig_str.no_encoding == mbfl_no_encoding_invalid
- || orig_str.no_encoding == mbfl_no_encoding_pass) {
- orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
+ const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
+ orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
}
pstr = NULL;
@@ -4129,15 +4312,13 @@ PHP_FUNCTION(mb_send_mail)
PHP_FUNCTION(mb_get_info)
{
char *typ = NULL;
- int typ_len, n;
+ int typ_len;
+ size_t n;
char *name;
const struct mb_overload_def *over_func;
zval *row1, *row2;
const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
- enum mbfl_no_encoding *entry;
-#ifdef ZEND_MULTIBYTE
- zval *row3;
-#endif /* ZEND_MULTIBYTE */
+ const mbfl_encoding **entry;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
RETURN_FALSE;
@@ -4145,14 +4326,14 @@ PHP_FUNCTION(mb_get_info)
if (!typ || !strcasecmp("all", typ)) {
array_init(return_value);
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
- add_assoc_string(return_value, "internal_encoding", name, 1);
+ if (MBSTRG(current_internal_encoding)) {
+ add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
}
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
- add_assoc_string(return_value, "http_input", name, 1);
+ if (MBSTRG(http_input_identify)) {
+ add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
}
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
- add_assoc_string(return_value, "http_output", name, 1);
+ if (MBSTRG(current_http_output_encoding)) {
+ add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
}
if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
@@ -4194,15 +4375,13 @@ PHP_FUNCTION(mb_get_info)
}
n = MBSTRG(current_detect_order_list_size);
entry = MBSTRG(current_detect_order_list);
- if(n > 0) {
+ if (n > 0) {
+ size_t i;
MAKE_STD_ZVAL(row2);
array_init(row2);
- while (n > 0) {
- if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
- add_next_index_string(row2, name, 1);
- }
+ for (i = 0; i < n; i++) {
+ add_next_index_string(row2, (*entry)->name, 1);
entry++;
- n--;
}
add_assoc_zval(return_value, "detect_order", row2);
}
@@ -4220,33 +4399,17 @@ PHP_FUNCTION(mb_get_info)
} else {
add_assoc_string(return_value, "strict_detection", "Off", 1);
}
-#ifdef ZEND_MULTIBYTE
- entry = MBSTRG(script_encoding_list);
- n = MBSTRG(script_encoding_list_size);
- if(n > 0) {
- MAKE_STD_ZVAL(row3);
- array_init(row3);
- while (n > 0) {
- if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
- add_next_index_string(row3, name, 1);
- }
- entry++;
- n--;
- }
- add_assoc_zval(return_value, "script_encoding", row3);
- }
-#endif /* ZEND_MULTIBYTE */
} else if (!strcasecmp("internal_encoding", typ)) {
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
- RETVAL_STRING(name, 1);
+ if (MBSTRG(current_internal_encoding)) {
+ RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
}
} else if (!strcasecmp("http_input", typ)) {
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
- RETVAL_STRING(name, 1);
+ if (MBSTRG(http_input_identify)) {
+ RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
}
} else if (!strcasecmp("http_output", typ)) {
- if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
- RETVAL_STRING(name, 1);
+ if (MBSTRG(current_http_output_encoding)) {
+ RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
}
} else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
@@ -4294,15 +4457,12 @@ PHP_FUNCTION(mb_get_info)
} else if (!strcasecmp("detect_order", typ)) {
n = MBSTRG(current_detect_order_list_size);
entry = MBSTRG(current_detect_order_list);
- if(n > 0) {
+ if (n > 0) {
+ size_t i;
array_init(return_value);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
- }
+ for (i = 0; i < n; i++) {
+ add_next_index_string(return_value, (*entry)->name, 1);
entry++;
- n--;
}
}
} else if (!strcasecmp("substitute_character", typ)) {
@@ -4322,24 +4482,6 @@ PHP_FUNCTION(mb_get_info)
RETVAL_STRING("Off", 1);
}
} else {
-#ifdef ZEND_MULTIBYTE
- if (!strcasecmp("script_encoding", typ)) {
- entry = MBSTRG(script_encoding_list);
- n = MBSTRG(script_encoding_list_size);
- if(n > 0) {
- array_init(return_value);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- add_next_index_string(return_value, name, 1);
- }
- entry++;
- n--;
- }
- }
- return;
- }
-#endif /* ZEND_MULTIBYTE */
RETURN_FALSE;
}
}
@@ -4354,7 +4496,7 @@ PHP_FUNCTION(mb_check_encoding)
char *enc = NULL;
int enc_len;
mbfl_buffer_converter *convd;
- enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
+ const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
mbfl_string string, result, *ret = NULL;
long illegalchars = 0;
@@ -4367,23 +4509,23 @@ PHP_FUNCTION(mb_check_encoding)
}
if (enc != NULL) {
- no_encoding = mbfl_name2no_encoding(enc);
- if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
+ encoding = mbfl_name2encoding(enc);
+ if (!encoding || encoding == &mbfl_encoding_pass) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
RETURN_FALSE;
}
}
-
- convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
+
+ convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
if (convd == NULL) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
RETURN_FALSE;
}
mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
mbfl_buffer_converter_illegal_substchar(convd, 0);
-
+
/* initialize string */
- mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
+ mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
mbfl_string_init(&result);
string.val = (unsigned char *)var;
@@ -4402,8 +4544,36 @@ PHP_FUNCTION(mb_check_encoding)
}
/* }}} */
-/* {{{ MBSTRING_API int php_mb_encoding_translation() */
-MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
+
+/* {{{ php_mb_populate_current_detect_order_list */
+static void php_mb_populate_current_detect_order_list(TSRMLS_D)
+{
+ const mbfl_encoding **entry = 0;
+ size_t nentries;
+
+ if (MBSTRG(current_detect_order_list)) {
+ return;
+ }
+
+ if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
+ nentries = MBSTRG(detect_order_list_size);
+ entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
+ memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
+ } else {
+ const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
+ size_t i;
+ nentries = MBSTRG(default_detect_order_list_size);
+ entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
+ for (i = 0; i < nentries; i++) {
+ entry[i] = mbfl_no2encoding(src[i]);
+ }
+ }
+ MBSTRG(current_detect_order_list) = entry;
+ MBSTRG(current_detect_order_list_size) = nentries;
+}
+
+/* {{{ static int php_mb_encoding_translation() */
+static int php_mb_encoding_translation(TSRMLS_D)
{
return MBSTRG(encoding_translation);
}
@@ -4430,8 +4600,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *e
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
{
- return php_mb_mbchar_bytes_ex(s,
- mbfl_no2encoding(MBSTRG(internal_encoding)));
+ return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
}
/* }}} */
@@ -4479,172 +4648,7 @@ MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t
/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
{
- return php_mb_safe_strrchr_ex(s, c, nbytes,
- mbfl_no2encoding(MBSTRG(internal_encoding)));
-}
-/* }}} */
-
-/* {{{ MBSTRING_API char *php_mb_strrchr() */
-MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
-{
- return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
-}
-/* }}} */
-
-/* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
-MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
-{
-
- if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
- return php_mb_mbchar_bytes_ex(s,
- mbfl_no2encoding(MBSTRG(http_input_identify)));
- } else {
- return php_mb_mbchar_bytes_ex(s,
- mbfl_no2encoding(MBSTRG(internal_encoding)));
- }
-}
-/* }}} */
-
-/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
-MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
-{
- int i;
- mbfl_string string, result, *ret = NULL;
- enum mbfl_no_encoding from_encoding, to_encoding;
- mbfl_buffer_converter *convd;
-
- if (encoding_to) {
- /* new encoding */
- to_encoding = mbfl_name2no_encoding(encoding_to);
- if (to_encoding == mbfl_no_encoding_invalid) {
- return -1;
- }
- } else {
- to_encoding = MBSTRG(current_internal_encoding);
- }
- if (encoding_from) {
- /* old encoding */
- from_encoding = mbfl_name2no_encoding(encoding_from);
- if (from_encoding == mbfl_no_encoding_invalid) {
- return -1;
- }
- } else {
- from_encoding = MBSTRG(http_input_identify);
- }
-
- if (from_encoding == mbfl_no_encoding_pass) {
- return 0;
- }
-
- /* initialize string */
- mbfl_string_init(&string);
- mbfl_string_init(&result);
- string.no_encoding = from_encoding;
- string.no_language = MBSTRG(language);
-
- for (i=0; i<num; i++){
- string.val = (unsigned char *)str[i];
- string.len = len[i];
-
- /* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
- if (convd == NULL) {
- return -1;
- }
- mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
-
- /* do it */
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- if (ret != NULL) {
- efree(str[i]);
- str[i] = (char *)ret->val;
- len[i] = (int)ret->len;
- }
-
- MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
- }
-
- return ret ? 0 : -1;
-}
-/* }}} */
-
-/* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
- */
-MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
-{
- mbfl_string string;
- enum mbfl_no_encoding *elist;
- enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
- mbfl_encoding_detector *identd = NULL;
-
- int size;
- enum mbfl_no_encoding *list;
-
- if (MBSTRG(http_input_list_size) == 1 &&
- MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
- MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
- return SUCCESS;
- }
-
- if (MBSTRG(http_input_list_size) == 1 &&
- MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
- mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
- MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
- return SUCCESS;
- }
-
- if (arg_list && strlen(arg_list)>0) {
- /* make encoding list */
- list = NULL;
- size = 0;
- php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
-
- if (size > 0 && list != NULL) {
- elist = list;
- } else {
- elist = MBSTRG(current_detect_order_list);
- size = MBSTRG(current_detect_order_list_size);
- if (size <= 0){
- elist = MBSTRG(default_detect_order_list);
- size = MBSTRG(default_detect_order_list_size);
- }
- }
- } else {
- elist = MBSTRG(current_detect_order_list);
- size = MBSTRG(current_detect_order_list_size);
- if (size <= 0){
- elist = MBSTRG(default_detect_order_list);
- size = MBSTRG(default_detect_order_list_size);
- }
- }
-
- mbfl_string_init(&string);
- string.no_language = MBSTRG(language);
-
- identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
-
- if (identd) {
- int n = 0;
- while(n < num){
- string.val = (unsigned char *)arg_string[n];
- string.len = arg_length[n];
- if (mbfl_encoding_detector_feed(identd, &string)) {
- break;
- }
- n++;
- }
- encoding = mbfl_encoding_detector_judge(identd);
- mbfl_encoding_detector_delete(identd);
- }
-
- if (encoding != mbfl_no_encoding_invalid) {
- MBSTRG(http_input_identify) = encoding;
- return SUCCESS;
- } else {
- return FAILURE;
- }
+ return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
}
/* }}} */
@@ -4659,9 +4663,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding);
+ haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding);
+ needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
do {
size_t len = 0;
@@ -4725,165 +4729,18 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
}
/* }}} */
-#ifdef ZEND_MULTIBYTE
-/* {{{ php_mb_set_zend_encoding() */
-static int php_mb_set_zend_encoding(TSRMLS_D)
+static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
{
- /* 'd better use mbfl_memory_device? */
- char *name, *list = NULL;
- int n, *entry, list_size = 0;
- zend_encoding_detector encoding_detector;
- zend_encoding_converter encoding_converter;
- zend_encoding_oddlen encoding_oddlen;
-
- /* notify script encoding to Zend Engine */
- entry = MBSTRG(script_encoding_list);
- n = MBSTRG(script_encoding_list_size);
- while (n > 0) {
- name = (char *)mbfl_no_encoding2name(*entry);
- if (name) {
- list_size += strlen(name) + 1;
- if (!list) {
- list = (char*)emalloc(list_size);
- *list = '\0';
- } else {
- list = (char*)erealloc(list, list_size);
- strcat(list, ",");
- }
- strcat(list, name);
- }
- entry++;
- n--;
- }
- zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
- if (list) {
- efree(list);
- }
- encoding_detector = php_mb_encoding_detector;
- encoding_converter = php_mb_encoding_converter;
- encoding_oddlen = php_mb_oddlen;
-
- /* TODO: make independent from mbstring.encoding_translation? */
- if (MBSTRG(encoding_translation)) {
- /* notify internal encoding to Zend Engine */
- name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
- zend_multibyte_set_internal_encoding(name TSRMLS_CC);
- }
-
- zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
-
- return 0;
+ *list = (const zend_encoding **)MBSTRG(http_input_list);
+ *list_size = MBSTRG(http_input_list_size);
}
/* }}} */
-/* {{{ char *php_mb_encoding_detector()
- * Interface for Zend Engine
- */
-static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
+static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
{
- mbfl_string string;
- const char *ret;
- enum mbfl_no_encoding *elist;
- int size, *list;
-
- /* make encoding list */
- list = NULL;
- size = 0;
- php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
- if (size <= 0) {
- return NULL;
- }
- if (size > 0 && list != NULL) {
- elist = list;
- } else {
- elist = MBSTRG(current_detect_order_list);
- size = MBSTRG(current_detect_order_list_size);
- }
-
- mbfl_string_init(&string);
- string.no_language = MBSTRG(language);
- string.val = (unsigned char *)arg_string;
- string.len = arg_length;
- ret = mbfl_identify_encoding_name(&string, elist, size, 0);
- if (list != NULL) {
- efree((void *)list);
- }
- if (ret != NULL) {
- return estrdup(ret);
- } else {
- return NULL;
- }
-}
-/* }}} */
-
-/* {{{ int php_mb_encoding_converter() */
-static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
-{
- mbfl_string string, result, *ret;
- enum mbfl_no_encoding from_encoding, to_encoding;
- mbfl_buffer_converter *convd;
-
- /* new encoding */
- to_encoding = mbfl_name2no_encoding(encoding_to);
- if (to_encoding == mbfl_no_encoding_invalid) {
- return -1;
- }
- /* old encoding */
- from_encoding = mbfl_name2no_encoding(encoding_from);
- if (from_encoding == mbfl_no_encoding_invalid) {
- return -1;
- }
- /* initialize string */
- mbfl_string_init(&string);
- mbfl_string_init(&result);
- string.no_encoding = from_encoding;
- string.no_language = MBSTRG(language);
- string.val = (unsigned char*)from;
- string.len = from_length;
-
- /* initialize converter */
- convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
- if (convd == NULL) {
- return -1;
- }
- mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
-
- /* do it */
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- if (ret != NULL) {
- *to = ret->val;
- *to_length = ret->len;
- }
-
- MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
-
- return ret ? 0 : -1;
-}
-/* }}} */
-
-/* {{{ int php_mb_oddlen()
- * returns number of odd (e.g. appears only first byte of multibyte
- * character) chars
- */
-static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
-{
- mbfl_string mb_string;
-
- mbfl_string_init(&mb_string);
- mb_string.no_language = MBSTRG(language);
- mb_string.no_encoding = mbfl_name2no_encoding(encoding);
- mb_string.val = (unsigned char *)string;
- mb_string.len = length;
-
- if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
- return 0;
- }
- return mbfl_oddlen(&mb_string);
+ MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
}
/* }}} */
-#endif /* ZEND_MULTIBYTE */
#endif /* HAVE_MBSTRING */