diff options
author | Benjamin Peterson <benjamin@python.org> | 2009-10-29 01:49:07 +0000 |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2009-10-29 01:49:07 +0000 |
commit | ea0e3b0d60b075e6d5cb09dcd83fd86e828612cb (patch) | |
tree | 4a8d50b06b8296e6dc9175e7ada7bfe4bb616a9a | |
parent | a7d44001b1f4c3a61c8c96e805034653e268122d (diff) | |
download | cpython-git-ea0e3b0d60b075e6d5cb09dcd83fd86e828612cb.tar.gz |
do a backport of r75928
The added test does not fail without the patch, but we still fix the issue of
surrogates being used in wide builds where they should not be.
-rw-r--r-- | Lib/test/test_pep263.py | 7 | ||||
-rw-r--r-- | Python/ast.c | 21 |
2 files changed, 19 insertions, 9 deletions
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index b7bb5a2965..e4faa9ff56 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -23,6 +23,13 @@ class PEP263Test(unittest.TestCase): self.assertEqual(d['u'], u'\xf3') + def test_issue3297(self): + c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec") + d = {} + exec(c, d) + self.assertEqual(d['a'], d['b']) + self.assertEqual(len(d['a']), len(d['b'])) + def test_main(): test_support.run_unittest(PEP263Test) diff --git a/Python/ast.c b/Python/ast.c index 347da2aa95..7f379a5957 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -3289,10 +3289,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons u = NULL; } else { /* check for integer overflow */ - if (len > PY_SIZE_MAX / 4) + if (len > PY_SIZE_MAX / 6) return NULL; - /* "\XX" may become "\u005c\uHHLL" (12 bytes) */ - u = PyString_FromStringAndSize((char *)NULL, len * 4); + /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 + "\รค" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ + u = PyString_FromStringAndSize((char *)NULL, len * 6); if (u == NULL) return NULL; p = buf = PyString_AsString(u); @@ -3309,19 +3310,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons PyObject *w; char *r; Py_ssize_t rn, i; - w = decode_utf8(c, &s, end, "utf-16-be"); + w = decode_utf8(c, &s, end, "utf-32-be"); if (w == NULL) { Py_DECREF(u); return NULL; } r = PyString_AsString(w); rn = PyString_Size(w); - assert(rn % 2 == 0); - for (i = 0; i < rn; i += 2) { - sprintf(p, "\\u%02x%02x", + assert(rn % 4 == 0); + for (i = 0; i < rn; i += 4) { + sprintf(p, "\\U%02x%02x%02x%02x", r[i + 0] & 0xFF, - r[i + 1] & 0xFF); - p += 6; + r[i + 1] & 0xFF, + r[i + 2] & 0xFF, + r[i + 3] & 0xFF); + p += 10; } Py_DECREF(w); } else { |