diff options
author | Victor Stinner <vstinner@redhat.com> | 2019-06-26 00:51:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-06-26 00:51:05 +0200 |
commit | 22eb689cf3de7972a2789db3ad01a86949508ab7 (patch) | |
tree | a1d63fa4cf235008e73f92a18ebef57be54ce4a5 /Lib/test | |
parent | e1a63c4f21011a3ae77dff624196561070c83446 (diff) | |
download | cpython-git-22eb689cf3de7972a2789db3ad01a86949508ab7.tar.gz |
bpo-37388: Development mode check encoding and errors (GH-14341)
In development mode and in debug build, encoding and errors arguments
are now checked on string encoding and decoding operations. Examples:
open(), str.encode() and bytes.decode().
By default, for best performances, the errors argument is only
checked at the first encoding/decoding error, and the encoding
argument is sometimes ignored for empty strings.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_bytes.py | 58 | ||||
-rw-r--r-- | Lib/test/test_io.py | 49 | ||||
-rw-r--r-- | Lib/test/test_unicode.py | 62 |
3 files changed, 168 insertions, 1 deletions
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index bbd45c7529..b5eeb2b4fc 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -12,12 +12,14 @@ import copy import functools import pickle import tempfile +import textwrap import unittest import test.support import test.string_tests import test.list_tests from test.support import bigaddrspacetest, MAX_Py_ssize_t +from test.support.script_helper import assert_python_failure if sys.flags.bytes_warning: @@ -315,6 +317,62 @@ class BaseBytesTest: # Default encoding is utf-8 self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603') + def test_check_encoding_errors(self): + # bpo-37388: bytes(str) and bytes.encode() must check encoding + # and errors arguments in dev mode + invalid = 'Boom, Shaka Laka, Boom!' + encodings = ('ascii', 'utf8', 'latin1') + code = textwrap.dedent(f''' + import sys + type2test = {self.type2test.__name__} + encodings = {encodings!r} + + for data in ('', 'short string'): + try: + type2test(data, encoding={invalid!r}) + except LookupError: + pass + else: + sys.exit(21) + + for encoding in encodings: + try: + type2test(data, encoding=encoding, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(22) + + for data in (b'', b'short string'): + data = type2test(data) + print(repr(data)) + try: + data.decode(encoding={invalid!r}) + except LookupError: + sys.exit(10) + else: + sys.exit(23) + + try: + data.decode(errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(24) + + for encoding in encodings: + try: + data.decode(encoding=encoding, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(25) + + sys.exit(10) + ''') + proc = assert_python_failure('-X', 'dev', '-c', code) + self.assertEqual(proc.rc, 10, proc) + def test_from_int(self): b = self.type2test(0) self.assertEqual(b, self.type2test()) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index fc474c9905..1fe1cba516 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -29,6 +29,7 @@ import random import signal import sys import sysconfig +import textwrap import threading import time import unittest @@ -37,7 +38,8 @@ import weakref from collections import deque, UserList from itertools import cycle, count from test import support -from test.support.script_helper import assert_python_ok, run_python_until_end +from test.support.script_helper import ( + assert_python_ok, assert_python_failure, run_python_until_end) from test.support import FakePath import codecs @@ -4130,6 +4132,51 @@ class MiscIOTest(unittest.TestCase): # there used to be a buffer overflow in the parser for rawmode self.assertRaises(ValueError, self.open, support.TESTFN, 'rwax+') + def test_check_encoding_errors(self): + # bpo-37388: open() and TextIOWrapper must check encoding and errors + # arguments in dev mode + mod = self.io.__name__ + filename = __file__ + invalid = 'Boom, Shaka Laka, Boom!' + code = textwrap.dedent(f''' + import sys + from {mod} import open, TextIOWrapper + + try: + open({filename!r}, encoding={invalid!r}) + except LookupError: + pass + else: + sys.exit(21) + + try: + open({filename!r}, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(22) + + fp = open({filename!r}, "rb") + with fp: + try: + TextIOWrapper(fp, encoding={invalid!r}) + except LookupError: + pass + else: + sys.exit(23) + + try: + TextIOWrapper(fp, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(24) + + sys.exit(10) + ''') + proc = assert_python_failure('-X', 'dev', '-c', code) + self.assertEqual(proc.rc, 10, proc) + class CMiscIOTest(MiscIOTest): io = io diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 36b72e40c7..177d80d27e 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -11,9 +11,11 @@ import itertools import operator import struct import sys +import textwrap import unittest import warnings from test import support, string_tests +from test.support.script_helper import assert_python_failure # Error handling (bad decoder return) def search_function(encoding): @@ -2436,6 +2438,66 @@ class UnicodeTest(string_tests.CommonTest, support.check_free_after_iterating(self, iter, str) support.check_free_after_iterating(self, reversed, str) + def test_check_encoding_errors(self): + # bpo-37388: str(bytes) and str.decode() must check encoding and errors + # arguments in dev mode + encodings = ('ascii', 'utf8', 'latin1') + invalid = 'Boom, Shaka Laka, Boom!' + code = textwrap.dedent(f''' + import sys + encodings = {encodings!r} + + for data in (b'', b'short string'): + try: + str(data, encoding={invalid!r}) + except LookupError: + pass + else: + sys.exit(21) + + try: + str(data, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(22) + + for encoding in encodings: + try: + str(data, encoding, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(22) + + for data in ('', 'short string'): + try: + data.encode(encoding={invalid!r}) + except LookupError: + pass + else: + sys.exit(23) + + try: + data.encode(errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(24) + + for encoding in encodings: + try: + data.encode(encoding, errors={invalid!r}) + except LookupError: + pass + else: + sys.exit(24) + + sys.exit(10) + ''') + proc = assert_python_failure('-X', 'dev', '-c', code) + self.assertEqual(proc.rc, 10, proc) + class CAPITest(unittest.TestCase): |