(Merge 3.1) Issue #12100: Don't reset incremental encoders of CJK codecs at

each call to their encode() method anymore, but continue to call the reset() method if the final argument is True.
author: Victor Stinner <victor.stinner@haypocalc.com> 2011-05-24 22:22:17 +0200
committer: Victor Stinner <victor.stinner@haypocalc.com> 2011-05-24 22:22:17 +0200
commit: d48ba0bde5bd535e9aa4c90cb122c0197f862e68 (patch)
tree: 6efaaf33c793f24e6a13aff17d7aade3e4e62381
parent: e6a06217d2adedbe4a07ad007eb8c9e6c4150052 (diff)
parent: 6bcbef7da0127272aa97cdd43ec529bfe92c3251 (diff)
download: cpython-git-d48ba0bde5bd535e9aa4c90cb122c0197f862e68.tar.gz
3 files changed, 38 insertions, 4 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py
index fe772e14bd..86c68dcd9a 100644
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -256,6 +256,36 @@ class Test_ISO2022(unittest.TestCase):
             # Any ISO 2022 codec will cause the segfault
             myunichr(x).encode('iso_2022_jp', 'ignore')
 
+class TestStateful(unittest.TestCase):
+    text = '\u4E16\u4E16'
+    encoding = 'iso-2022-jp'
+    expected = b'\x1b$B@$@$'
+    expected_reset = b'\x1b$B@$@$\x1b(B'
+
+    def test_encode(self):
+        self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
+
+    def test_incrementalencoder(self):
+        encoder = codecs.getincrementalencoder(self.encoding)()
+        output = b''.join(
+            encoder.encode(char)
+            for char in self.text)
+        self.assertEqual(output, self.expected)
+
+    def test_incrementalencoder_final(self):
+        encoder = codecs.getincrementalencoder(self.encoding)()
+        last_index = len(self.text) - 1
+        output = b''.join(
+            encoder.encode(char, index == last_index)
+            for index, char in enumerate(self.text))
+        self.assertEqual(output, self.expected_reset)
+
+class TestHZStateful(TestStateful):
+    text = '\u804a\u804a'
+    encoding = 'hz'
+    expected = b'~{ADAD'
+    expected_reset = b'~{ADAD~}'
+
 def test_main():
     support.run_unittest(__name__)
 
diff --git a/Misc/NEWS b/Misc/NEWS
index fc54f8905a..bca13127d4 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
+  their encode() method anymore, but continue to call the reset() method if the
+  final argument is True.
+
 - Issue #5715: In socketserver, close the server socket in the child process.
 
 - Correct lookup of __dir__ on objects. Among other things, this causes errors
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index af7ea5b83a..7b04f020c9 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -479,7 +479,7 @@ multibytecodec_encode(MultibyteCodec *codec,
     MultibyteEncodeBuffer buf;
     Py_ssize_t finalsize, r = 0;
 
-    if (datalen == 0)
+    if (datalen == 0 && !(flags & MBENC_RESET))
         return PyBytes_FromStringAndSize(NULL, 0);
 
     buf.excobj = NULL;
@@ -514,7 +514,7 @@ multibytecodec_encode(MultibyteCodec *codec,
             break;
     }
 
-    if (codec->encreset != NULL)
+    if (codec->encreset != NULL && (flags & MBENC_RESET))
         for (;;) {
             Py_ssize_t outleft;
 
@@ -784,8 +784,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
     inbuf_end = inbuf + datalen;
 
     r = multibytecodec_encode(ctx->codec, &ctx->state,
-                    (const Py_UNICODE **)&inbuf,
-                    datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+                    (const Py_UNICODE **)&inbuf, datalen,
+                    ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
     if (r == NULL) {
         /* recover the original pending buffer */
         if (origpending > 0)
author	Victor Stinner <victor.stinner@haypocalc.com>	2011-05-24 22:22:17 +0200
committer	Victor Stinner <victor.stinner@haypocalc.com>	2011-05-24 22:22:17 +0200
commit	d48ba0bde5bd535e9aa4c90cb122c0197f862e68 (patch)
tree	6efaaf33c793f24e6a13aff17d7aade3e4e62381
parent	e6a06217d2adedbe4a07ad007eb8c9e6c4150052 (diff)
parent	6bcbef7da0127272aa97cdd43ec529bfe92c3251 (diff)
download	cpython-git-d48ba0bde5bd535e9aa4c90cb122c0197f862e68.tar.gz