summaryrefslogtreecommitdiff
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-10-02 23:33:16 +0200
committerVictor Stinner <victor.stinner@haypocalc.com>2011-10-02 23:33:16 +0200
commitf42dc448e0df37ed9dba7018f7fb26787f25518b (patch)
tree2c1eea3897b9f49f22187834a23f583e6b5e7228 /Objects/unicodeobject.c
parentc53be96c54ec619266ff64b732100bbe0d592b69 (diff)
downloadcpython-git-f42dc448e0df37ed9dba7018f7fb26787f25518b.tar.gz
PyUnicode_CopyCharacters() fails when copying latin1 into ascii
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c63
1 files changed, 56 insertions, 7 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1aabd2e5dd..8f710bd29d 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -455,6 +455,46 @@ _PyUnicode_New(Py_ssize_t length)
return NULL;
}
+static const char*
+unicode_kind_name(PyObject *unicode)
+{
+ assert(PyUnicode_Check(unicode));
+ if (!PyUnicode_IS_COMPACT(unicode))
+ {
+ if (!PyUnicode_IS_READY(unicode))
+ return "wstr";
+ switch(PyUnicode_KIND(unicode))
+ {
+ case PyUnicode_1BYTE_KIND:
+ if (PyUnicode_IS_COMPACT_ASCII(unicode))
+ return "legacy ascii";
+ else
+ return "legacy latin1";
+ case PyUnicode_2BYTE_KIND:
+ return "legacy UCS2";
+ case PyUnicode_4BYTE_KIND:
+ return "legacy UCS4";
+ default:
+ return "<legacy invalid kind>";
+ }
+ }
+ assert(PyUnicode_IS_READY(unicode));
+ switch(PyUnicode_KIND(unicode))
+ {
+ case PyUnicode_1BYTE_KIND:
+ if (PyUnicode_IS_COMPACT_ASCII(unicode))
+ return "ascii";
+ else
+ return "compact latin1";
+ case PyUnicode_2BYTE_KIND:
+ return "compact UCS2";
+ case PyUnicode_4BYTE_KIND:
+ return "compact UCS4";
+ default:
+ return "<invalid compact kind>";
+ }
+}
+
#ifdef Py_DEBUG
int unicode_new_new_calls = 0;
@@ -672,8 +712,10 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
to_kind = PyUnicode_KIND(to);
to_data = PyUnicode_DATA(to);
- if (from_kind == to_kind) {
- /* fast path */
+ if (from_kind == to_kind
+ /* deny latin1 => ascii */
+ && PyUnicode_MAX_CHAR_VALUE(to) >= PyUnicode_MAX_CHAR_VALUE(from))
+ {
Py_MEMCPY((char*)to_data
+ PyUnicode_KIND_SIZE(to_kind, to_start),
(char*)from_data
@@ -712,7 +754,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
}
else {
int invalid_kinds;
- if (from_kind > to_kind) {
+
+ /* check if max_char(from substring) <= max_char(to) */
+ if (from_kind > to_kind
+ /* latin1 => ascii */
+ || (PyUnicode_IS_COMPACT_ASCII(to)
+ && to_kind == PyUnicode_1BYTE_KIND
+ && !PyUnicode_IS_COMPACT_ASCII(from)))
+ {
/* slow path to check for character overflow */
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch, maxchar;
@@ -736,10 +785,10 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
invalid_kinds = 1;
if (invalid_kinds) {
PyErr_Format(PyExc_ValueError,
- "Cannot copy UCS%u characters "
- "into a string of UCS%u characters",
- 1 << (from_kind - 1),
- 1 << (to_kind -1));
+ "Cannot copy %s characters "
+ "into a string of %s characters",
+ unicode_kind_name(from),
+ unicode_kind_name(to));
return -1;
}
}