summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2015-12-03 01:05:52 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2015-12-03 01:05:52 +0200
commit7c088a9b5cb7d2a74bfe3bc4a8e380b9023e5579 (patch)
tree7c84f7043dd05e0dcdcee861269880fa43b14472
parent7bc168faf58db89de19ef57d4b1f27fcf8fc2833 (diff)
parent6648bf5661b79f5b40385b21570dff6f146c5eb5 (diff)
downloadcpython-git-7c088a9b5cb7d2a74bfe3bc4a8e380b9023e5579.tar.gz
Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.
-rw-r--r--Lib/test/test_unicode.py17
-rw-r--r--Misc/NEWS2
-rw-r--r--Objects/unicodeobject.c5
3 files changed, 24 insertions, 0 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index ce8df40d16..fac8b7b602 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2702,6 +2702,23 @@ class UnicodeTest(string_tests.CommonTest,
self.assertTrue(astral >= bmp2)
self.assertFalse(astral >= astral2)
+ @support.cpython_only
+ def test_pep393_utf8_caching_bug(self):
+ # Issue #25709: Problem with string concatenation and utf-8 cache
+ from _testcapi import getargs_s_hash
+ for k in 0x24, 0xa4, 0x20ac, 0x1f40d:
+ s = ''
+ for i in range(5):
+ # Due to CPython specific optimization the 's' string can be
+ # resized in-place.
+ s += chr(k)
+ # Parsing with the "s#" format code calls indirectly
+ # PyUnicode_AsUTF8AndSize() which creates the UTF-8
+ # encoded string cached in the Unicode object.
+ self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+ # Check that the second call returns the same result
+ self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
+
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 6f86de701e..2fbd20e50b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ Release date: XXXX-XX-XX
Core and Builtins
-----------------
+- Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache.
+
- Issue #5319: New Py_FinalizeEx() API allowing Python to set an exit status
of 120 on failure to flush buffered streams.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 47c97ccaf0..895a4e88f3 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -885,6 +885,11 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
}
new_size = (struct_size + (length + 1) * char_size);
+ if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
+ PyObject_DEL(_PyUnicode_UTF8(unicode));
+ _PyUnicode_UTF8(unicode) = NULL;
+ _PyUnicode_UTF8_LENGTH(unicode) = 0;
+ }
_Py_DEC_REFTOTAL;
_Py_ForgetReference(unicode);