diff options
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 6706507335..5a3834d495 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase): self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) +class Utf8bTest(unittest.TestCase): + + def test_utf8(self): + # Bad byte + self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"), + "foo\udc80bar") + self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"), + b"foo\x80bar") + # bad-utf-8 encoded surrogate + self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"), + "\udced\udcb0\udc80") + self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"), + b"\xed\xb0\x80") + + def test_ascii(self): + # bad byte + self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"), + "foo\udc80bar") + self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"), + b"foo\x80bar") + + def test_charmap(self): + # bad byte: \xa5 is unmapped in iso-8859-3 + self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"), + "foo\udca5bar") + self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"), + b"foo\xa5bar") + def test_main(): support.run_unittest( @@ -1543,6 +1571,7 @@ def test_main(): CharmapTest, WithStmtTest, TypesTest, + Utf8bTest, ) |