bpo-33578: Add getstate/setstate for CJK codec (GH-6984)

This implements getstate and setstate for the cjkcodecs multibyte incremental encoders/decoders, primarily to fix issues with seek/tell. The encoder getstate/setstate is slightly tricky as the "state" is pending bytes + MultibyteCodec_State but only an integer can be returned. The approach I've taken is to encode this data into a long, similar to how .tell() encodes a "cookie_type" as a long. https://bugs.python.org/issue33578
author: Christopher Thorne <libcthorne@users.noreply.github.com> 2018-11-01 10:48:49 +0000
committer: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> 2018-11-01 03:48:49 -0700
commit: ac22f6aa989f18c33c12615af1c66c73cf75d5e7 (patch)
tree: bb21f3018f9b5e4b40ede33ce78bba1b13980f86 /Lib/test/test_io.py
parent: 4b5e62dbb22a3593e0db266c12f805b727a42b00 (diff)
download: cpython-git-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.tar.gz
1 files changed, 28 insertions, 0 deletions
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index d927bb96ce..14352ff84f 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2971,6 +2971,34 @@ class TextIOWrapperTest(unittest.TestCase):
         finally:
             StatefulIncrementalDecoder.codecEnabled = 0
 
+    def test_multibyte_seek_and_tell(self):
+        f = self.open(support.TESTFN, "w", encoding="euc_jp")
+        f.write("AB\n\u3046\u3048\n")
+        f.close()
+
+        f = self.open(support.TESTFN, "r", encoding="euc_jp")
+        self.assertEqual(f.readline(), "AB\n")
+        p0 = f.tell()
+        self.assertEqual(f.readline(), "\u3046\u3048\n")
+        p1 = f.tell()
+        f.seek(p0)
+        self.assertEqual(f.readline(), "\u3046\u3048\n")
+        self.assertEqual(f.tell(), p1)
+        f.close()
+
+    def test_seek_with_encoder_state(self):
+        f = self.open(support.TESTFN, "w", encoding="euc_jis_2004")
+        f.write("\u00e6\u0300")
+        p0 = f.tell()
+        f.write("\u00e6")
+        f.seek(p0)
+        f.write("\u0300")
+        f.close()
+
+        f = self.open(support.TESTFN, "r", encoding="euc_jis_2004")
+        self.assertEqual(f.readline(), "\u00e6\u0300\u0300")
+        f.close()
+
     def test_encoded_writes(self):
         data = "1234567890"
         tests = ("utf-16",
author	Christopher Thorne <libcthorne@users.noreply.github.com>	2018-11-01 10:48:49 +0000
committer	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	2018-11-01 03:48:49 -0700
commit	ac22f6aa989f18c33c12615af1c66c73cf75d5e7 (patch)
tree	bb21f3018f9b5e4b40ede33ce78bba1b13980f86 /Lib/test/test_io.py
parent	4b5e62dbb22a3593e0db266c12f805b727a42b00 (diff)
download	cpython-git-ac22f6aa989f18c33c12615af1c66c73cf75d5e7.tar.gz