diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2010-02-25 17:51:33 +0000 |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2010-02-25 17:51:33 +0000 |
commit | 85ddea7e69aa6892858df1a93d468608cedb93df (patch) | |
tree | bc744f554c760c7d09d8c64dc83f2582441d8fa2 | |
parent | 5a896788b62608104a04a73820216768de11188e (diff) | |
download | cpython-git-85ddea7e69aa6892858df1a93d468608cedb93df.tar.gz |
Merged revisions 78449 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r78449 | ezio.melotti | 2010-02-25 19:36:04 +0200 (Thu, 25 Feb 2010) | 1 line
#7649: "u'%c' % char" now behaves like "u'%s' % char" and raises a UnicodeDecodeError if 'char' is a byte string that can't be decoded using the default encoding.
........
-rw-r--r-- | Misc/NEWS | 5 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 27 |
2 files changed, 21 insertions, 11 deletions
@@ -16,8 +16,9 @@ Core and Builtins UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to strings. -- Issue #7649: Fix u'%c' % char for character in range 0x80..0xFF, raise an - UnicodeDecodeError. +- Issue #7649: "u'%c' % char" now behaves like "u'%s' % char" and raises a + UnicodeDecodeError if 'char' is a byte string that can't be decoded using + the default encoding. - Issue #5677: Explicitly forbid write operations on read-only file objects, and read operations on write-only file objects. On Windows, the system C diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 667afaef28..133cae5d5f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8357,7 +8357,8 @@ formatchar(Py_UNICODE *buf, size_t buflen, PyObject *v) { - PyObject *s; + PyObject *unistr; + char *str; /* presume that the buffer is at least 2 characters long */ if (PyUnicode_Check(v)) { if (PyUnicode_GET_SIZE(v) != 1) @@ -8368,14 +8369,22 @@ formatchar(Py_UNICODE *buf, else if (PyString_Check(v)) { if (PyString_GET_SIZE(v) != 1) goto onError; - /* #7649: if the char is a non-ascii (i.e. in range(0x80,0x100)) byte - string, "u'%c' % char" should fail with a UnicodeDecodeError */ - s = PyUnicode_FromStringAndSize(PyString_AS_STRING(v), 1); - /* if the char is not decodable return -1 */ - if (s == NULL) - return -1; - buf[0] = PyUnicode_AS_UNICODE(s)[0]; - Py_DECREF(s); + /* #7649: "u'%c' % char" should behave like "u'%s' % char" and fail + with a UnicodeDecodeError if 'char' is not decodable with the + default encoding (usually ASCII, but it might be something else) */ + str = PyString_AS_STRING(v); + if ((unsigned char)str[0] > 0x7F) { + /* the char is not ASCII; try to decode the string using the + default encoding and return -1 to let the UnicodeDecodeError + be raised if the string can't be decoded */ + unistr = PyUnicode_Decode(str, 1, NULL, "strict"); + if (unistr == NULL) + return -1; + buf[0] = PyUnicode_AS_UNICODE(unistr)[0]; + Py_DECREF(unistr); + } + else + buf[0] = (Py_UNICODE)str[0]; } else { |