summaryrefslogtreecommitdiff
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c44
1 files changed, 18 insertions, 26 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0da565a612..6c8fe2d865 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4492,7 +4492,6 @@ _PyUnicode_EncodeUTF7(PyObject *str,
void *data;
Py_ssize_t len;
PyObject *v;
- Py_ssize_t allocated;
int inShift = 0;
Py_ssize_t i;
unsigned int base64bits = 0;
@@ -4510,11 +4509,9 @@ _PyUnicode_EncodeUTF7(PyObject *str,
return PyBytes_FromStringAndSize(NULL, 0);
/* It might be possible to tighten this worst case */
- allocated = 8 * len;
- if (allocated / 8 != len)
+ if (len > PY_SSIZE_T_MAX / 8)
return PyErr_NoMemory();
-
- v = PyBytes_FromStringAndSize(NULL, allocated);
+ v = PyBytes_FromStringAndSize(NULL, len * 8);
if (v == NULL)
return NULL;
@@ -5092,7 +5089,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
Py_ssize_t len;
PyObject *v;
unsigned char *p;
- Py_ssize_t nsize, bytesize, i;
+ Py_ssize_t nsize, i;
/* Offsets from p for storing byte pairs in the right order. */
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
int iorder[] = {0, 1, 2, 3};
@@ -5120,10 +5117,9 @@ _PyUnicode_EncodeUTF32(PyObject *str,
len = PyUnicode_GET_LENGTH(str);
nsize = len + (byteorder == 0);
- bytesize = nsize * 4;
- if (bytesize / 4 != nsize)
+ if (nsize > PY_SSIZE_T_MAX / 4)
return PyErr_NoMemory();
- v = PyBytes_FromStringAndSize(NULL, bytesize);
+ v = PyBytes_FromStringAndSize(NULL, nsize * 4);
if (v == NULL)
return NULL;
@@ -5772,18 +5768,12 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
void *data;
Py_ssize_t expandsize = 0;
- /* Initial allocation is based on the longest-possible unichr
+ /* Initial allocation is based on the longest-possible character
escape.
- In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
- unichr, so in this case it's the longest unichr escape. In
- narrow (UTF-16) builds this is five chars per source unichr
- since there are two unichrs in the surrogate pair, so in narrow
- (UTF-16) builds it's not the longest unichr escape.
-
- In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
- so in the narrow (UTF-16) build case it's the longest unichr
- escape.
+ For UCS1 strings it's '\xxx', 4 bytes per source character.
+ For UCS2 strings it's '\uxxxx', 6 bytes per source character.
+ For UCS4 strings it's '\U00xxxxxx', 10 bytes per source character.
*/
if (!PyUnicode_Check(unicode)) {
@@ -10165,7 +10155,7 @@ replace(PyObject *self, PyObject *str1,
}
else {
Py_ssize_t n, i, j, ires;
- Py_ssize_t product, new_size;
+ Py_ssize_t new_size;
int rkind = skind;
char *res;
@@ -10197,19 +10187,18 @@ replace(PyObject *self, PyObject *str1,
}
/* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
PyUnicode_GET_LENGTH(str1))); */
- product = n * (len2-len1);
- if ((product / (len2-len1)) != n) {
+ if (len2 > len1 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
PyErr_SetString(PyExc_OverflowError,
"replace string is too long");
goto error;
}
- new_size = slen + product;
+ new_size = slen + n * (len2 - len1);
if (new_size == 0) {
Py_INCREF(unicode_empty);
u = unicode_empty;
goto done;
}
- if (new_size < 0 || new_size > (PY_SSIZE_T_MAX >> (rkind-1))) {
+ if (new_size > (PY_SSIZE_T_MAX >> (rkind-1))) {
PyErr_SetString(PyExc_OverflowError,
"replace string is too long");
goto error;
@@ -13442,8 +13431,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
uformat = PyUnicode_FromObject(format);
if (uformat == NULL)
return NULL;
- if (PyUnicode_READY(uformat) == -1)
+ if (PyUnicode_READY(uformat) == -1) {
Py_DECREF(uformat);
+ return NULL;
+ }
fmt = PyUnicode_DATA(uformat);
fmtkind = PyUnicode_KIND(uformat);
@@ -14083,7 +14074,8 @@ onError:
}
PyDoc_STRVAR(unicode_doc,
- "str(object[, encoding[, errors]]) -> str\n\
+"str(object='') -> str\n\
+str(bytes_or_buffer[, encoding[, errors]]) -> str\n\
\n\
Create a new string object from the given object. If encoding or\n\
errors is specified, then the object must expose a data buffer\n\