diff options
Diffstat (limited to 'ext/mysqlnd/mysqlnd_charset.c')
| -rw-r--r-- | ext/mysqlnd/mysqlnd_charset.c | 50 | 
1 files changed, 46 insertions, 4 deletions
diff --git a/ext/mysqlnd/mysqlnd_charset.c b/ext/mysqlnd/mysqlnd_charset.c index c2f2b8890e..dfa90db255 100644 --- a/ext/mysqlnd/mysqlnd_charset.c +++ b/ext/mysqlnd/mysqlnd_charset.c @@ -418,20 +418,60 @@ static uint mysqlnd_mbcharlen_utf16(unsigned int utf16)  /* {{{ utf32 functions */ -static uint -check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused))) +static unsigned int check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))  {  	return 4;  } -static uint -mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused))) +static unsigned int mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))  {  	return 4;  }  /* }}} */ + +/* {{{ gb18030 functions */ +#define is_gb18030_odd(c)          (0x81 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE) +#define is_gb18030_even_2(c)       ((0x40 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x7E) || (0x80 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0xFE)) +#define is_gb18030_even_4(c)       (0x30 <= (zend_uchar) (c) && (zend_uchar) (c) <= 0x39) + + +static unsigned int mysqlnd_mbcharlen_gb18030(unsigned int c) +{ +	if (c <= 0xFF) { +		return !is_gb18030_odd(c); +	} +	if (c > 0xFFFF || !is_gb18030_odd((c >> 8) & 0xFF)) { +		return 0; +	} +	if (is_gb18030_even_2((c & 0xFF))) { +	    return 2; +	} +	if (is_gb18030_even_4((c & 0xFF))) { +		return 4; +	} + +	return 0; +} + + +static unsigned int my_ismbchar_gb18030(const char * start, const char * end) +{ +	if (end - start <= 1 || !is_gb18030_odd(start[0])) { +		return 0; +	} + +	if (is_gb18030_even_2(start[1])) { +		return 2; +	} else if (end - start > 3 && is_gb18030_even_4(start[1]) && is_gb18030_odd(start[2]) && is_gb18030_even_4(start[3])) { +		return 4; +	} + +	return 0; +} +/* }}} */ +  /*    The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,    for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3. @@ -643,6 +683,8 @@ const MYSQLND_CHARSET mysqlnd_charsets[] =  	{ 245, UTF8_MB4, UTF8_MB4"_croatian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},  	{ 246, UTF8_MB4, UTF8_MB4"_unicode_520_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},  	{ 247, UTF8_MB4, UTF8_MB4"_vietnamese_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid}, +	{ 248, "gb18030", "gb18030_chinese_ci", 1, 4, "", mysqlnd_mbcharlen_gb18030, my_ismbchar_gb18030}, +	{ 249, "gb18030", "gb18030_bin", 1, 4, "", mysqlnd_mbcharlen_gb18030, my_ismbchar_gb18030},  	{ 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},  	{   0, NULL, NULL, 0, 0, NULL, NULL, NULL}  | 
