diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 44 |
1 files changed, 18 insertions, 26 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0da565a612..6c8fe2d865 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4492,7 +4492,6 @@ _PyUnicode_EncodeUTF7(PyObject *str, void *data; Py_ssize_t len; PyObject *v; - Py_ssize_t allocated; int inShift = 0; Py_ssize_t i; unsigned int base64bits = 0; @@ -4510,11 +4509,9 @@ _PyUnicode_EncodeUTF7(PyObject *str, return PyBytes_FromStringAndSize(NULL, 0); /* It might be possible to tighten this worst case */ - allocated = 8 * len; - if (allocated / 8 != len) + if (len > PY_SSIZE_T_MAX / 8) return PyErr_NoMemory(); - - v = PyBytes_FromStringAndSize(NULL, allocated); + v = PyBytes_FromStringAndSize(NULL, len * 8); if (v == NULL) return NULL; @@ -5092,7 +5089,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, Py_ssize_t len; PyObject *v; unsigned char *p; - Py_ssize_t nsize, bytesize, i; + Py_ssize_t nsize, i; /* Offsets from p for storing byte pairs in the right order. */ #ifdef BYTEORDER_IS_LITTLE_ENDIAN int iorder[] = {0, 1, 2, 3}; @@ -5120,10 +5117,9 @@ _PyUnicode_EncodeUTF32(PyObject *str, len = PyUnicode_GET_LENGTH(str); nsize = len + (byteorder == 0); - bytesize = nsize * 4; - if (bytesize / 4 != nsize) + if (nsize > PY_SSIZE_T_MAX / 4) return PyErr_NoMemory(); - v = PyBytes_FromStringAndSize(NULL, bytesize); + v = PyBytes_FromStringAndSize(NULL, nsize * 4); if (v == NULL) return NULL; @@ -5772,18 +5768,12 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode) void *data; Py_ssize_t expandsize = 0; - /* Initial allocation is based on the longest-possible unichr + /* Initial allocation is based on the longest-possible character escape. - In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source - unichr, so in this case it's the longest unichr escape. In - narrow (UTF-16) builds this is five chars per source unichr - since there are two unichrs in the surrogate pair, so in narrow - (UTF-16) builds it's not the longest unichr escape. - - In wide or narrow builds '\uxxxx' is 6 chars per source unichr, - so in the narrow (UTF-16) build case it's the longest unichr - escape. + For UCS1 strings it's '\xxx', 4 bytes per source character. + For UCS2 strings it's '\uxxxx', 6 bytes per source character. + For UCS4 strings it's '\U00xxxxxx', 10 bytes per source character. */ if (!PyUnicode_Check(unicode)) { @@ -10165,7 +10155,7 @@ replace(PyObject *self, PyObject *str1, } else { Py_ssize_t n, i, j, ires; - Py_ssize_t product, new_size; + Py_ssize_t new_size; int rkind = skind; char *res; @@ -10197,19 +10187,18 @@ replace(PyObject *self, PyObject *str1, } /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) - PyUnicode_GET_LENGTH(str1))); */ - product = n * (len2-len1); - if ((product / (len2-len1)) != n) { + if (len2 > len1 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); goto error; } - new_size = slen + product; + new_size = slen + n * (len2 - len1); if (new_size == 0) { Py_INCREF(unicode_empty); u = unicode_empty; goto done; } - if (new_size < 0 || new_size > (PY_SSIZE_T_MAX >> (rkind-1))) { + if (new_size > (PY_SSIZE_T_MAX >> (rkind-1))) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); goto error; @@ -13442,8 +13431,10 @@ PyUnicode_Format(PyObject *format, PyObject *args) uformat = PyUnicode_FromObject(format); if (uformat == NULL) return NULL; - if (PyUnicode_READY(uformat) == -1) + if (PyUnicode_READY(uformat) == -1) { Py_DECREF(uformat); + return NULL; + } fmt = PyUnicode_DATA(uformat); fmtkind = PyUnicode_KIND(uformat); @@ -14083,7 +14074,8 @@ onError: } PyDoc_STRVAR(unicode_doc, - "str(object[, encoding[, errors]]) -> str\n\ +"str(object='') -> str\n\ +str(bytes_or_buffer[, encoding[, errors]]) -> str\n\ \n\ Create a new string object from the given object. If encoding or\n\ errors is specified, then the object must expose a data buffer\n\ |