diff options
-rw-r--r-- | Doc/library/codecs.rst | 2 | ||||
-rw-r--r-- | Doc/whatsnew/3.3.rst | 2 | ||||
-rw-r--r-- | Modules/_codecsmodule.c | 20 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 28 |
4 files changed, 42 insertions, 10 deletions
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 4523c7ff2c..a9fae95d07 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1173,6 +1173,8 @@ particular, the following variants typically exist: | unicode_internal | | Return the internal | | | | representation of the | | | | operand | +| | | | +| | | .. deprecated:: 3.3 | +--------------------+---------+---------------------------+ The following codecs provide bytes-to-bytes mappings. diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 911d8d9f7e..7f4517ff28 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -250,6 +250,8 @@ versions. (:issue:`12100`) +The ``unicode_internal`` codec has been deprecated. + crypt ----- diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 727cf5e77a..93cb1b702b 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -675,18 +675,30 @@ unicode_internal_encode(PyObject *self, PyObject *obj; const char *errors = NULL; const char *data; - Py_ssize_t size; + Py_ssize_t len, size; if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode", &obj, &errors)) return NULL; if (PyUnicode_Check(obj)) { + Py_UNICODE *u; + if (PyUnicode_READY(obj) < 0) return NULL; - data = PyUnicode_AS_DATA(obj); - size = PyUnicode_GET_DATA_SIZE(obj); - return codec_tuple(PyBytes_FromStringAndSize(data, size), + + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "unicode_internal codecs has been deprecated", + 1)) + return NULL; + + u = PyUnicode_AsUnicodeAndSize(obj, &len); + if (u == NULL) + return NULL; + if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) + return PyErr_NoMemory(); + size = len * sizeof(Py_UNICODE); + return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size), PyUnicode_GET_LENGTH(obj)); } else { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 61534b48d5..3f580b5ff6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6237,6 +6237,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL; + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "unicode_internal codecs has been deprecated", + 1)) + return NULL; + /* XXX overflow detection missing */ v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127); if (v == NULL) @@ -6270,15 +6275,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s, errors, &errorHandler, "unicode_internal", reason, &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { + &v, &outpos)) goto onError; - } + continue; } - else { - if (unicode_putchar(&v, &outpos, ch) < 0) - goto onError; - s += Py_UNICODE_SIZE; + + s += Py_UNICODE_SIZE; +#ifndef Py_UNICODE_WIDE + if (ch >= 0xD800 && ch <= 0xDBFF && s < end) + { + Py_UCS4 ch2 = *(Py_UNICODE*)s; + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) + { + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; + s += Py_UNICODE_SIZE; + } } +#endif + + if (unicode_putchar(&v, &outpos, ch) < 0) + goto onError; } if (PyUnicode_Resize(&v, outpos) < 0) |