summaryrefslogtreecommitdiff
path: root/Lib/test/test_codecs.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_codecs.py')
-rw-r--r--Lib/test/test_codecs.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 6706507335..5a3834d495 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase):
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
+class Utf8bTest(unittest.TestCase):
+
+ def test_utf8(self):
+ # Bad byte
+ self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
+ "foo\udc80bar")
+ self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
+ b"foo\x80bar")
+ # bad-utf-8 encoded surrogate
+ self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
+ "\udced\udcb0\udc80")
+ self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
+ b"\xed\xb0\x80")
+
+ def test_ascii(self):
+ # bad byte
+ self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
+ "foo\udc80bar")
+ self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
+ b"foo\x80bar")
+
+ def test_charmap(self):
+ # bad byte: \xa5 is unmapped in iso-8859-3
+ self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
+ "foo\udca5bar")
+ self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
+ b"foo\xa5bar")
+
def test_main():
support.run_unittest(
@@ -1543,6 +1571,7 @@ def test_main():
CharmapTest,
WithStmtTest,
TypesTest,
+ Utf8bTest,
)