diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 775bd15cf6..3d9e09d7fa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4190,7 +4190,10 @@ unicode_decode_call_errorhandler_writer( Py_ssize_t insize; Py_ssize_t newpos; Py_ssize_t replen; + Py_ssize_t remain; PyObject *inputobj = NULL; + int need_to_grow = 0; + const char *new_inptr; if (*errorHandler == NULL) { *errorHandler = PyCodec_LookupError(errors); @@ -4221,6 +4224,7 @@ unicode_decode_call_errorhandler_writer( inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject); if (!inputobj) goto onError; + remain = *inend - *input - *endinpos; *input = PyBytes_AS_STRING(inputobj); insize = PyBytes_GET_SIZE(inputobj); *inend = *input + insize; @@ -4238,6 +4242,19 @@ unicode_decode_call_errorhandler_writer( replen = PyUnicode_GET_LENGTH(repunicode); if (replen > 1) { writer->min_length += replen - 1; + need_to_grow = 1; + } + new_inptr = *input + newpos; + if (*inend - new_inptr > remain) { + /* We don't know the decoding algorithm here so we make the worst + assumption that one byte decodes to one unicode character. + If unfortunately one byte could decode to more unicode characters, + the decoder may write out-of-bound then. Is it possible for the + algorithms using this function? */ + writer->min_length += *inend - new_inptr - remain; + need_to_grow = 1; + } + if (need_to_grow) { writer->overallocate = 1; if (_PyUnicodeWriter_Prepare(writer, writer->min_length, PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1) @@ -4247,7 +4264,7 @@ unicode_decode_call_errorhandler_writer( goto onError; *endinpos = newpos; - *inptr = *input + newpos; + *inptr = new_inptr; /* we made it! */ Py_DECREF(restuple); @@ -5572,7 +5589,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, #endif /* Note: size will always be longer than the resulting Unicode - character count */ + character count normally. Error handler will take care of + resizing when needed. */ _PyUnicodeWriter_Init(&writer); writer.min_length = (e - q + 1) / 2; if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1) |