diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2009-05-05 04:43:17 +0000 |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2009-05-05 04:43:17 +0000 |
commit | 011e8420339245f9b55d41082ec6036f2f83a182 (patch) | |
tree | 6e278775c41c1d50c62e3a42b960797813d245ef /Lib/test/test_codecs.py | |
parent | 93f65a177b36396dddd1e2938cc037288a7eb400 (diff) | |
download | cpython-git-011e8420339245f9b55d41082ec6036f2f83a182.tar.gz |
Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r-- | Lib/test/test_codecs.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 6706507335..5a3834d495 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase): self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) +class Utf8bTest(unittest.TestCase): + + def test_utf8(self): + # Bad byte + self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"), + "foo\udc80bar") + self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"), + b"foo\x80bar") + # bad-utf-8 encoded surrogate + self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"), + "\udced\udcb0\udc80") + self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"), + b"\xed\xb0\x80") + + def test_ascii(self): + # bad byte + self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"), + "foo\udc80bar") + self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"), + b"foo\x80bar") + + def test_charmap(self): + # bad byte: \xa5 is unmapped in iso-8859-3 + self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"), + "foo\udca5bar") + self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"), + b"foo\xa5bar") + def test_main(): support.run_unittest( @@ -1543,6 +1571,7 @@ def test_main(): CharmapTest, WithStmtTest, TypesTest, + Utf8bTest, ) |