1 files changed, 18 insertions, 17 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 1a55696840..463be2c8f8 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1,12 +1,12 @@
 /* ------------------------------------------------------------------------
 
-   unicodedata -- Provides access to the Unicode 5.1 data base.
+   unicodedata -- Provides access to the Unicode 5.2 data base.
 
-   Data was extracted from the Unicode 5.1 UnicodeData.txt file.
+   Data was extracted from the Unicode 5.2 UnicodeData.txt file.
 
    Written by Marc-Andre Lemburg (mal@lemburg.com).
    Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
-   Modified by Martin v. L�wis (martin@v.loewis.de)
+   Modified by Martin v. Löwis (martin@v.loewis.de)
 
    Copyright (c) Corporation for National Research Initiatives.
 
@@ -36,7 +36,7 @@ typedef struct change_record {
     const unsigned char category_changed;
     const unsigned char decimal_changed;
     const unsigned char mirrored_changed;
-    const int numeric_changed;
+    const double numeric_changed;
 } change_record;
 
 /* data file generated by Tools/unicode/makeunicodedata.py */
@@ -403,7 +403,8 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
 {
     PyUnicodeObject *v;
     char decomp[256];
-    int code, index, count, i;
+    int code, index, count;
+    size_t i;
     unsigned int prefix_index;
     Py_UCS4 c;
 
@@ -450,15 +451,12 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
     while (count-- > 0) {
         if (i)
             decomp[i++] = ' ';
-        assert((size_t)i < sizeof(decomp));
+        assert(i < sizeof(decomp));
         PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X",
                       decomp_data[++index]);
         i += strlen(decomp + i);
     }
-
-    decomp[i] = '\0';
-
-    return PyUnicode_FromString(decomp);
+    return PyUnicode_FromStringAndSize(decomp, i);
 }
 
 static void
@@ -871,13 +869,16 @@ static char *hangul_syllables[][3] = {
     { 0,    0,     "H"  }
 };
 
+/* These ranges need to match makeunicodedata.py:cjk_ranges. */
 static int
 is_unified_ideograph(Py_UCS4 code)
 {
-    return (
-        (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
-        (0x4E00 <= code && code <= 0x9FC3) || /* CJK Ideograph, Unicode 5.1 */
-        (0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */
+    return
+        (0x3400 <= code && code <= 0x4DB5)   || /* CJK Ideograph Extension A */
+        (0x4E00 <= code && code <= 0x9FCB)   || /* CJK Ideograph */
+        (0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
+        (0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
+        (0x2B740 <= code && code <= 0x2B81D);   /* CJK Ideograph Extension D */
 }
 
 static int
@@ -1240,11 +1241,11 @@ PyDoc_STRVAR(unicodedata_docstring,
 "This module provides access to the Unicode Character Database which\n\
 defines character properties for all Unicode characters. The data in\n\
 this database is based on the UnicodeData.txt file version\n\
-5.1.0 which is publically available from ftp://ftp.unicode.org/.\n\
+5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\
 \n\
 The module uses the same names and symbols as defined by the\n\
-UnicodeData File Format 5.1.0 (see\n\
-http://www.unicode.org/Public/5.1.0/ucd/UCD.html).");
+UnicodeData File Format 5.2.0 (see\n\
+http://www.unicode.org/reports/tr44/tr44-4.html).");
 
 
 static struct PyModuleDef unicodedatamodule = {