diff options
| author | Victor Stinner <victor.stinner@haypocalc.com> | 2011-05-24 22:22:17 +0200 | 
|---|---|---|
| committer | Victor Stinner <victor.stinner@haypocalc.com> | 2011-05-24 22:22:17 +0200 | 
| commit | d48ba0bde5bd535e9aa4c90cb122c0197f862e68 (patch) | |
| tree | 6efaaf33c793f24e6a13aff17d7aade3e4e62381 | |
| parent | e6a06217d2adedbe4a07ad007eb8c9e6c4150052 (diff) | |
| parent | 6bcbef7da0127272aa97cdd43ec529bfe92c3251 (diff) | |
| download | cpython-git-d48ba0bde5bd535e9aa4c90cb122c0197f862e68.tar.gz | |
(Merge 3.1) Issue #12100: Don't reset incremental encoders of CJK codecs at
each call to their encode() method anymore, but continue to call the reset()
method if the final argument is True.
| -rw-r--r-- | Lib/test/test_multibytecodec.py | 30 | ||||
| -rw-r--r-- | Misc/NEWS | 4 | ||||
| -rw-r--r-- | Modules/cjkcodecs/multibytecodec.c | 8 | 
3 files changed, 38 insertions, 4 deletions
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index fe772e14bd..86c68dcd9a 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -256,6 +256,36 @@ class Test_ISO2022(unittest.TestCase):              # Any ISO 2022 codec will cause the segfault              myunichr(x).encode('iso_2022_jp', 'ignore') +class TestStateful(unittest.TestCase): +    text = '\u4E16\u4E16' +    encoding = 'iso-2022-jp' +    expected = b'\x1b$B@$@$' +    expected_reset = b'\x1b$B@$@$\x1b(B' + +    def test_encode(self): +        self.assertEqual(self.text.encode(self.encoding), self.expected_reset) + +    def test_incrementalencoder(self): +        encoder = codecs.getincrementalencoder(self.encoding)() +        output = b''.join( +            encoder.encode(char) +            for char in self.text) +        self.assertEqual(output, self.expected) + +    def test_incrementalencoder_final(self): +        encoder = codecs.getincrementalencoder(self.encoding)() +        last_index = len(self.text) - 1 +        output = b''.join( +            encoder.encode(char, index == last_index) +            for index, char in enumerate(self.text)) +        self.assertEqual(output, self.expected_reset) + +class TestHZStateful(TestStateful): +    text = '\u804a\u804a' +    encoding = 'hz' +    expected = b'~{ADAD' +    expected_reset = b'~{ADAD~}' +  def test_main():      support.run_unittest(__name__) @@ -13,6 +13,10 @@ Core and Builtins  Library  ------- +- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to +  their encode() method anymore, but continue to call the reset() method if the +  final argument is True. +  - Issue #5715: In socketserver, close the server socket in the child process.  - Correct lookup of __dir__ on objects. Among other things, this causes errors diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index af7ea5b83a..7b04f020c9 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -479,7 +479,7 @@ multibytecodec_encode(MultibyteCodec *codec,      MultibyteEncodeBuffer buf;      Py_ssize_t finalsize, r = 0; -    if (datalen == 0) +    if (datalen == 0 && !(flags & MBENC_RESET))          return PyBytes_FromStringAndSize(NULL, 0);      buf.excobj = NULL; @@ -514,7 +514,7 @@ multibytecodec_encode(MultibyteCodec *codec,              break;      } -    if (codec->encreset != NULL) +    if (codec->encreset != NULL && (flags & MBENC_RESET))          for (;;) {              Py_ssize_t outleft; @@ -784,8 +784,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,      inbuf_end = inbuf + datalen;      r = multibytecodec_encode(ctx->codec, &ctx->state, -                    (const Py_UNICODE **)&inbuf, -                    datalen, ctx->errors, final ? MBENC_FLUSH : 0); +                    (const Py_UNICODE **)&inbuf, datalen, +                    ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);      if (r == NULL) {          /* recover the original pending buffer */          if (origpending > 0)  | 
