summaryrefslogtreecommitdiff
path: root/Python
diff options
context:
space:
mode:
authorBenjamin Peterson <benjamin@python.org>2009-10-29 01:49:07 +0000
committerBenjamin Peterson <benjamin@python.org>2009-10-29 01:49:07 +0000
commitea0e3b0d60b075e6d5cb09dcd83fd86e828612cb (patch)
tree4a8d50b06b8296e6dc9175e7ada7bfe4bb616a9a /Python
parenta7d44001b1f4c3a61c8c96e805034653e268122d (diff)
downloadcpython-git-ea0e3b0d60b075e6d5cb09dcd83fd86e828612cb.tar.gz
do a backport of r75928
The added test does not fail without the patch, but we still fix the issue of surrogates being used in wide builds where they should not be.
Diffstat (limited to 'Python')
-rw-r--r--Python/ast.c21
1 files changed, 12 insertions, 9 deletions
diff --git a/Python/ast.c b/Python/ast.c
index 347da2aa95..7f379a5957 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3289,10 +3289,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
u = NULL;
} else {
/* check for integer overflow */
- if (len > PY_SIZE_MAX / 4)
+ if (len > PY_SIZE_MAX / 6)
return NULL;
- /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
- u = PyString_FromStringAndSize((char *)NULL, len * 4);
+ /* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+ "\รค" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+ u = PyString_FromStringAndSize((char *)NULL, len * 6);
if (u == NULL)
return NULL;
p = buf = PyString_AsString(u);
@@ -3309,19 +3310,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
PyObject *w;
char *r;
Py_ssize_t rn, i;
- w = decode_utf8(c, &s, end, "utf-16-be");
+ w = decode_utf8(c, &s, end, "utf-32-be");
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
r = PyString_AsString(w);
rn = PyString_Size(w);
- assert(rn % 2 == 0);
- for (i = 0; i < rn; i += 2) {
- sprintf(p, "\\u%02x%02x",
+ assert(rn % 4 == 0);
+ for (i = 0; i < rn; i += 4) {
+ sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF,
- r[i + 1] & 0xFF);
- p += 6;
+ r[i + 1] & 0xFF,
+ r[i + 2] & 0xFF,
+ r[i + 3] & 0xFF);
+ p += 10;
}
Py_DECREF(w);
} else {