diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-10-05 13:43:50 +0200 |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-10-05 13:43:50 +0200 |
commit | 1d65d9192dac57776693c55a9ccefbde2ca74c23 (patch) | |
tree | 260cc1acc8425fe62664da26bfdf30c4fa39b508 /Lib/test/test_codecs.py | |
parent | 7dbe6dd96393d713e405f80fa0eb8f9471c8276a (diff) | |
download | cpython-git-1d65d9192dac57776693c55a9ccefbde2ca74c23.tar.gz |
Issue #25301: The UTF-8 decoder is now up to 15 times as fast for error
handlers: ``ignore``, ``replace`` and ``surrogateescape``.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index bdc331e491..7b6883fcc5 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -788,6 +788,18 @@ class UTF8Test(ReadTest, unittest.TestCase): self.check_state_handling_decode(self.encoding, u, u.encode(self.encoding)) + def test_decode_error(self): + for data, error_handler, expected in ( + (b'[\x80\xff]', 'ignore', '[]'), + (b'[\x80\xff]', 'replace', '[\ufffd\ufffd]'), + (b'[\x80\xff]', 'surrogateescape', '[\udc80\udcff]'), + (b'[\x80\xff]', 'backslashreplace', '[\\x80\\xff]'), + ): + with self.subTest(data=data, error_handler=error_handler, + expected=expected): + self.assertEqual(data.decode(self.encoding, error_handler), + expected) + def test_lone_surrogates(self): super().test_lone_surrogates() # not sure if this is making sense for |