summaryrefslogtreecommitdiff
path: root/Modules/unicodedata.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/unicodedata.c')
-rw-r--r--Modules/unicodedata.c35
1 files changed, 18 insertions, 17 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 1a55696840..463be2c8f8 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1,12 +1,12 @@
/* ------------------------------------------------------------------------
- unicodedata -- Provides access to the Unicode 5.1 data base.
+ unicodedata -- Provides access to the Unicode 5.2 data base.
- Data was extracted from the Unicode 5.1 UnicodeData.txt file.
+ Data was extracted from the Unicode 5.2 UnicodeData.txt file.
Written by Marc-Andre Lemburg (mal@lemburg.com).
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
- Modified by Martin v. Löwis (martin@v.loewis.de)
+ Modified by Martin v. Löwis (martin@v.loewis.de)
Copyright (c) Corporation for National Research Initiatives.
@@ -36,7 +36,7 @@ typedef struct change_record {
const unsigned char category_changed;
const unsigned char decimal_changed;
const unsigned char mirrored_changed;
- const int numeric_changed;
+ const double numeric_changed;
} change_record;
/* data file generated by Tools/unicode/makeunicodedata.py */
@@ -403,7 +403,8 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
char decomp[256];
- int code, index, count, i;
+ int code, index, count;
+ size_t i;
unsigned int prefix_index;
Py_UCS4 c;
@@ -450,15 +451,12 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
while (count-- > 0) {
if (i)
decomp[i++] = ' ';
- assert((size_t)i < sizeof(decomp));
+ assert(i < sizeof(decomp));
PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X",
decomp_data[++index]);
i += strlen(decomp + i);
}
-
- decomp[i] = '\0';
-
- return PyUnicode_FromString(decomp);
+ return PyUnicode_FromStringAndSize(decomp, i);
}
static void
@@ -871,13 +869,16 @@ static char *hangul_syllables[][3] = {
{ 0, 0, "H" }
};
+/* These ranges need to match makeunicodedata.py:cjk_ranges. */
static int
is_unified_ideograph(Py_UCS4 code)
{
- return (
- (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
- (0x4E00 <= code && code <= 0x9FC3) || /* CJK Ideograph, Unicode 5.1 */
- (0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */
+ return
+ (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
+ (0x4E00 <= code && code <= 0x9FCB) || /* CJK Ideograph */
+ (0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
+ (0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
+ (0x2B740 <= code && code <= 0x2B81D); /* CJK Ideograph Extension D */
}
static int
@@ -1240,11 +1241,11 @@ PyDoc_STRVAR(unicodedata_docstring,
"This module provides access to the Unicode Character Database which\n\
defines character properties for all Unicode characters. The data in\n\
this database is based on the UnicodeData.txt file version\n\
-5.1.0 which is publically available from ftp://ftp.unicode.org/.\n\
+5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\
\n\
The module uses the same names and symbols as defined by the\n\
-UnicodeData File Format 5.1.0 (see\n\
-http://www.unicode.org/Public/5.1.0/ucd/UCD.html).");
+UnicodeData File Format 5.2.0 (see\n\
+http://www.unicode.org/reports/tr44/tr44-4.html).");
static struct PyModuleDef unicodedatamodule = {