diff options
Diffstat (limited to 'Modules/unicodedata.c')
-rw-r--r-- | Modules/unicodedata.c | 35 |
1 files changed, 18 insertions, 17 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 1a55696840..463be2c8f8 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1,12 +1,12 @@ /* ------------------------------------------------------------------------ - unicodedata -- Provides access to the Unicode 5.1 data base. + unicodedata -- Provides access to the Unicode 5.2 data base. - Data was extracted from the Unicode 5.1 UnicodeData.txt file. + Data was extracted from the Unicode 5.2 UnicodeData.txt file. Written by Marc-Andre Lemburg (mal@lemburg.com). Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) - Modified by Martin v. Löwis (martin@v.loewis.de) + Modified by Martin v. Löwis (martin@v.loewis.de) Copyright (c) Corporation for National Research Initiatives. @@ -36,7 +36,7 @@ typedef struct change_record { const unsigned char category_changed; const unsigned char decimal_changed; const unsigned char mirrored_changed; - const int numeric_changed; + const double numeric_changed; } change_record; /* data file generated by Tools/unicode/makeunicodedata.py */ @@ -403,7 +403,8 @@ unicodedata_decomposition(PyObject *self, PyObject *args) { PyUnicodeObject *v; char decomp[256]; - int code, index, count, i; + int code, index, count; + size_t i; unsigned int prefix_index; Py_UCS4 c; @@ -450,15 +451,12 @@ unicodedata_decomposition(PyObject *self, PyObject *args) while (count-- > 0) { if (i) decomp[i++] = ' '; - assert((size_t)i < sizeof(decomp)); + assert(i < sizeof(decomp)); PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X", decomp_data[++index]); i += strlen(decomp + i); } - - decomp[i] = '\0'; - - return PyUnicode_FromString(decomp); + return PyUnicode_FromStringAndSize(decomp, i); } static void @@ -871,13 +869,16 @@ static char *hangul_syllables[][3] = { { 0, 0, "H" } }; +/* These ranges need to match makeunicodedata.py:cjk_ranges. */ static int is_unified_ideograph(Py_UCS4 code) { - return ( - (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */ - (0x4E00 <= code && code <= 0x9FC3) || /* CJK Ideograph, Unicode 5.1 */ - (0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */ + return + (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */ + (0x4E00 <= code && code <= 0x9FCB) || /* CJK Ideograph */ + (0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */ + (0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */ + (0x2B740 <= code && code <= 0x2B81D); /* CJK Ideograph Extension D */ } static int @@ -1240,11 +1241,11 @@ PyDoc_STRVAR(unicodedata_docstring, "This module provides access to the Unicode Character Database which\n\ defines character properties for all Unicode characters. The data in\n\ this database is based on the UnicodeData.txt file version\n\ -5.1.0 which is publically available from ftp://ftp.unicode.org/.\n\ +5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\ \n\ The module uses the same names and symbols as defined by the\n\ -UnicodeData File Format 5.1.0 (see\n\ -http://www.unicode.org/Public/5.1.0/ucd/UCD.html)."); +UnicodeData File Format 5.2.0 (see\n\ +http://www.unicode.org/reports/tr44/tr44-4.html)."); static struct PyModuleDef unicodedatamodule = { |