summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/mbstring/php_unicode.c29
-rw-r--r--ext/mbstring/tests/bug69267.phpt44
-rw-r--r--ext/mbstring/ucgendat/ucgendat.c16
-rw-r--r--ext/mbstring/unicode_data.h58
4 files changed, 101 insertions, 46 deletions
diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c
index 208c10319e..2baa4690c9 100644
--- a/ext/mbstring/php_unicode.c
+++ b/ext/mbstring/php_unicode.c
@@ -116,6 +116,7 @@ MBSTRING_API int php_unicode_is_prop(unsigned long code, ...)
static unsigned long case_lookup(unsigned long code, long l, long r, int field)
{
long m;
+ const unsigned int *tmp;
/*
* Do the binary search.
@@ -126,13 +127,13 @@ static unsigned long case_lookup(unsigned long code, long l, long r, int field)
* the beginning of a case mapping triple.
*/
m = (l + r) >> 1;
- m -= (m % 3);
- if (code > _uccase_map[m])
- l = m + 3;
- else if (code < _uccase_map[m])
- r = m - 3;
- else if (code == _uccase_map[m])
- return _uccase_map[m + field];
+ tmp = &_uccase_map[m*3];
+ if (code > *tmp)
+ l = m + 1;
+ else if (code < *tmp)
+ r = m - 1;
+ else if (code == *tmp)
+ return tmp[field];
}
return code;
@@ -161,16 +162,16 @@ MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_
/*
* The character is lower case.
*/
- field = 2;
+ field = 1;
l = _uccase_len[0];
- r = (l + _uccase_len[1]) - 3;
+ r = (l + _uccase_len[1]) - 1;
} else {
/*
* The character is title case.
*/
field = 1;
l = _uccase_len[0] + _uccase_len[1];
- r = _uccase_size - 3;
+ r = _uccase_size - 1;
}
return case_lookup(code, l, r, field);
}
@@ -200,14 +201,14 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_
*/
field = 1;
l = 0;
- r = _uccase_len[0] - 3;
+ r = _uccase_len[0] - 1;
} else {
/*
* The character is title case.
*/
field = 2;
l = _uccase_len[0] + _uccase_len[1];
- r = _uccase_size - 3;
+ r = _uccase_size - 1;
}
return case_lookup(code, l, r, field);
}
@@ -230,13 +231,13 @@ MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_
* The character is upper case.
*/
l = 0;
- r = _uccase_len[0] - 3;
+ r = _uccase_len[0] - 1;
} else {
/*
* The character is lower case.
*/
l = _uccase_len[0];
- r = (l + _uccase_len[1]) - 3;
+ r = (l + _uccase_len[1]) - 1;
}
return case_lookup(code, l, r, field);
diff --git a/ext/mbstring/tests/bug69267.phpt b/ext/mbstring/tests/bug69267.phpt
new file mode 100644
index 0000000000..958f1c548b
--- /dev/null
+++ b/ext/mbstring/tests/bug69267.phpt
@@ -0,0 +1,44 @@
+--TEST--
+Bug #69267: mb_strtolower fails on titlecase characters
+--FILE--
+<?php
+
+$str_l = "džljnjdz";
+$str_u = "DŽLJNJDZ";
+$str_t = "DžLjNjDz";
+var_dump(mb_strtolower($str_l));
+var_dump(mb_strtolower($str_u));
+var_dump(mb_strtolower($str_t));
+var_dump(mb_strtoupper($str_l));
+var_dump(mb_strtoupper($str_u));
+var_dump(mb_strtoupper($str_t));
+var_dump(mb_convert_case($str_l, MB_CASE_TITLE));
+var_dump(mb_convert_case($str_u, MB_CASE_TITLE));
+var_dump(mb_convert_case($str_t, MB_CASE_TITLE));
+
+$str_l = "ᾳ";
+$str_t = "ᾼ";
+var_dump(mb_strtolower($str_l));
+var_dump(mb_strtolower($str_t));
+var_dump(mb_strtoupper($str_l));
+var_dump(mb_strtoupper($str_t));
+var_dump(mb_convert_case($str_l, MB_CASE_TITLE));
+var_dump(mb_convert_case($str_t, MB_CASE_TITLE));
+
+?>
+--EXPECT--
+string(8) "džljnjdz"
+string(8) "džljnjdz"
+string(8) "džljnjdz"
+string(8) "DŽLJNJDZ"
+string(8) "DŽLJNJDZ"
+string(8) "DŽLJNJDZ"
+string(8) "Džljnjdz"
+string(8) "Džljnjdz"
+string(8) "Džljnjdz"
+string(3) "ᾳ"
+string(3) "ᾳ"
+string(3) "ᾼ"
+string(3) "ᾼ"
+string(3) "ᾼ"
+string(3) "ᾼ"
diff --git a/ext/mbstring/ucgendat/ucgendat.c b/ext/mbstring/ucgendat/ucgendat.c
index f7b8ab5498..42441082ef 100644
--- a/ext/mbstring/ucgendat/ucgendat.c
+++ b/ext/mbstring/ucgendat/ucgendat.c
@@ -539,6 +539,10 @@ add_title(ac_uint4 code)
*/
cases[2] = code;
+ /* If lower/upper case does not exist, stay the same */
+ if (!cases[0]) cases[0] = code;
+ if (!cases[1]) cases[1] = code;
+
if (title_used == title_size) {
if (title_size == 0)
title = (_case_t *) malloc(sizeof(_case_t) << 3);
@@ -825,7 +829,9 @@ read_cdata(FILE *in)
lineno = skip = 0;
while (fgets(line, sizeof(line), in)) {
- if( (s=strchr(line, '\n')) ) *s = '\0';
+ int is_title = 0;
+
+ if( (s=strchr(line, '\n')) ) *s = '\0';
lineno++;
/*
@@ -968,6 +974,10 @@ read_cdata(FILE *in)
ordered_range_insert(code, s, e - s);
+ if (e - s == 2 && s[0] == 'L' && s[1] == 't') {
+ is_title = 1;
+ }
+
/*
* Locate the combining class code.
*/
@@ -1112,7 +1122,7 @@ read_cdata(FILE *in)
if (*s == ';')
s++;
}
- if (cases[0] && cases[1])
+ if (is_title)
/*
* Add the upper and lower mappings for a title case character.
*/
@@ -1442,7 +1452,7 @@ write_cdata(char *opath)
" * LowerIndex = _uccase_len[0]\n"
" * TitleIndex = LowerIndex + _uccase_len[1] */\n\n");
fprintf(out, PREF "unsigned short _uccase_len[2] = {%ld, %ld};\n\n",
- (long) upper_used * 3, (long) lower_used * 3);
+ (long) upper_used, (long) lower_used);
fprintf(out, PREF "unsigned int _uccase_map[] = {");
if (upper_used > 0)
diff --git a/ext/mbstring/unicode_data.h b/ext/mbstring/unicode_data.h
index 16ceb6a975..c176128cf1 100644
--- a/ext/mbstring/unicode_data.h
+++ b/ext/mbstring/unicode_data.h
@@ -2469,7 +2469,7 @@ static const unsigned int _uccase_size = 2470;
* LowerIndex = _uccase_len[0]
* TitleIndex = LowerIndex + _uccase_len[1] */
-static const unsigned short _uccase_len[2] = {3687, 3711};
+static const unsigned short _uccase_len[2] = {1202, 1237};
static const unsigned int _uccase_map[] = {
0x00000041, 0x00000061, 0x00000041,
@@ -3235,40 +3235,14 @@ static const unsigned int _uccase_map[] = {
0x00001f6d, 0x00001f65, 0x00001f6d,
0x00001f6e, 0x00001f66, 0x00001f6e,
0x00001f6f, 0x00001f67, 0x00001f6f,
- 0x00001f88, 0x00001f80, 0x00001f88,
- 0x00001f89, 0x00001f81, 0x00001f89,
- 0x00001f8a, 0x00001f82, 0x00001f8a,
- 0x00001f8b, 0x00001f83, 0x00001f8b,
- 0x00001f8c, 0x00001f84, 0x00001f8c,
- 0x00001f8d, 0x00001f85, 0x00001f8d,
- 0x00001f8e, 0x00001f86, 0x00001f8e,
- 0x00001f8f, 0x00001f87, 0x00001f8f,
- 0x00001f98, 0x00001f90, 0x00001f98,
- 0x00001f99, 0x00001f91, 0x00001f99,
- 0x00001f9a, 0x00001f92, 0x00001f9a,
- 0x00001f9b, 0x00001f93, 0x00001f9b,
- 0x00001f9c, 0x00001f94, 0x00001f9c,
- 0x00001f9d, 0x00001f95, 0x00001f9d,
- 0x00001f9e, 0x00001f96, 0x00001f9e,
- 0x00001f9f, 0x00001f97, 0x00001f9f,
- 0x00001fa8, 0x00001fa0, 0x00001fa8,
- 0x00001fa9, 0x00001fa1, 0x00001fa9,
- 0x00001faa, 0x00001fa2, 0x00001faa,
- 0x00001fab, 0x00001fa3, 0x00001fab,
- 0x00001fac, 0x00001fa4, 0x00001fac,
- 0x00001fad, 0x00001fa5, 0x00001fad,
- 0x00001fae, 0x00001fa6, 0x00001fae,
- 0x00001faf, 0x00001fa7, 0x00001faf,
0x00001fb8, 0x00001fb0, 0x00001fb8,
0x00001fb9, 0x00001fb1, 0x00001fb9,
0x00001fba, 0x00001f70, 0x00001fba,
0x00001fbb, 0x00001f71, 0x00001fbb,
- 0x00001fbc, 0x00001fb3, 0x00001fbc,
0x00001fc8, 0x00001f72, 0x00001fc8,
0x00001fc9, 0x00001f73, 0x00001fc9,
0x00001fca, 0x00001f74, 0x00001fca,
0x00001fcb, 0x00001f75, 0x00001fcb,
- 0x00001fcc, 0x00001fc3, 0x00001fcc,
0x00001fd8, 0x00001fd0, 0x00001fd8,
0x00001fd9, 0x00001fd1, 0x00001fd9,
0x00001fda, 0x00001f76, 0x00001fda,
@@ -3282,7 +3256,6 @@ static const unsigned int _uccase_map[] = {
0x00001ff9, 0x00001f79, 0x00001ff9,
0x00001ffa, 0x00001f7c, 0x00001ffa,
0x00001ffb, 0x00001f7d, 0x00001ffb,
- 0x00001ffc, 0x00001ff3, 0x00001ffc,
0x00002126, 0x000003c9, 0x00002126,
0x0000212a, 0x0000006b, 0x0000212a,
0x0000212b, 0x000000e5, 0x0000212b,
@@ -4941,6 +4914,33 @@ static const unsigned int _uccase_map[] = {
0x000001c5, 0x000001c4, 0x000001c6,
0x000001c8, 0x000001c7, 0x000001c9,
0x000001cb, 0x000001ca, 0x000001cc,
- 0x000001f2, 0x000001f1, 0x000001f3
+ 0x000001f2, 0x000001f1, 0x000001f3,
+ 0x00001f88, 0x00001f88, 0x00001f80,
+ 0x00001f89, 0x00001f89, 0x00001f81,
+ 0x00001f8a, 0x00001f8a, 0x00001f82,
+ 0x00001f8b, 0x00001f8b, 0x00001f83,
+ 0x00001f8c, 0x00001f8c, 0x00001f84,
+ 0x00001f8d, 0x00001f8d, 0x00001f85,
+ 0x00001f8e, 0x00001f8e, 0x00001f86,
+ 0x00001f8f, 0x00001f8f, 0x00001f87,
+ 0x00001f98, 0x00001f98, 0x00001f90,
+ 0x00001f99, 0x00001f99, 0x00001f91,
+ 0x00001f9a, 0x00001f9a, 0x00001f92,
+ 0x00001f9b, 0x00001f9b, 0x00001f93,
+ 0x00001f9c, 0x00001f9c, 0x00001f94,
+ 0x00001f9d, 0x00001f9d, 0x00001f95,
+ 0x00001f9e, 0x00001f9e, 0x00001f96,
+ 0x00001f9f, 0x00001f9f, 0x00001f97,
+ 0x00001fa8, 0x00001fa8, 0x00001fa0,
+ 0x00001fa9, 0x00001fa9, 0x00001fa1,
+ 0x00001faa, 0x00001faa, 0x00001fa2,
+ 0x00001fab, 0x00001fab, 0x00001fa3,
+ 0x00001fac, 0x00001fac, 0x00001fa4,
+ 0x00001fad, 0x00001fad, 0x00001fa5,
+ 0x00001fae, 0x00001fae, 0x00001fa6,
+ 0x00001faf, 0x00001faf, 0x00001fa7,
+ 0x00001fbc, 0x00001fbc, 0x00001fb3,
+ 0x00001fcc, 0x00001fcc, 0x00001fc3,
+ 0x00001ffc, 0x00001ffc, 0x00001ff3
};