summaryrefslogtreecommitdiff
path: root/Lib/test
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@redhat.com>2019-06-26 00:51:05 +0200
committerGitHub <noreply@github.com>2019-06-26 00:51:05 +0200
commit22eb689cf3de7972a2789db3ad01a86949508ab7 (patch)
treea1d63fa4cf235008e73f92a18ebef57be54ce4a5 /Lib/test
parente1a63c4f21011a3ae77dff624196561070c83446 (diff)
downloadcpython-git-22eb689cf3de7972a2789db3ad01a86949508ab7.tar.gz
bpo-37388: Development mode check encoding and errors (GH-14341)
In development mode and in debug build, encoding and errors arguments are now checked on string encoding and decoding operations. Examples: open(), str.encode() and bytes.decode(). By default, for best performances, the errors argument is only checked at the first encoding/decoding error, and the encoding argument is sometimes ignored for empty strings.
Diffstat (limited to 'Lib/test')
-rw-r--r--Lib/test/test_bytes.py58
-rw-r--r--Lib/test/test_io.py49
-rw-r--r--Lib/test/test_unicode.py62
3 files changed, 168 insertions, 1 deletions
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index bbd45c7529..b5eeb2b4fc 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -12,12 +12,14 @@ import copy
import functools
import pickle
import tempfile
+import textwrap
import unittest
import test.support
import test.string_tests
import test.list_tests
from test.support import bigaddrspacetest, MAX_Py_ssize_t
+from test.support.script_helper import assert_python_failure
if sys.flags.bytes_warning:
@@ -315,6 +317,62 @@ class BaseBytesTest:
# Default encoding is utf-8
self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
+ def test_check_encoding_errors(self):
+ # bpo-37388: bytes(str) and bytes.encode() must check encoding
+ # and errors arguments in dev mode
+ invalid = 'Boom, Shaka Laka, Boom!'
+ encodings = ('ascii', 'utf8', 'latin1')
+ code = textwrap.dedent(f'''
+ import sys
+ type2test = {self.type2test.__name__}
+ encodings = {encodings!r}
+
+ for data in ('', 'short string'):
+ try:
+ type2test(data, encoding={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(21)
+
+ for encoding in encodings:
+ try:
+ type2test(data, encoding=encoding, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(22)
+
+ for data in (b'', b'short string'):
+ data = type2test(data)
+ print(repr(data))
+ try:
+ data.decode(encoding={invalid!r})
+ except LookupError:
+ sys.exit(10)
+ else:
+ sys.exit(23)
+
+ try:
+ data.decode(errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(24)
+
+ for encoding in encodings:
+ try:
+ data.decode(encoding=encoding, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(25)
+
+ sys.exit(10)
+ ''')
+ proc = assert_python_failure('-X', 'dev', '-c', code)
+ self.assertEqual(proc.rc, 10, proc)
+
def test_from_int(self):
b = self.type2test(0)
self.assertEqual(b, self.type2test())
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index fc474c9905..1fe1cba516 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -29,6 +29,7 @@ import random
import signal
import sys
import sysconfig
+import textwrap
import threading
import time
import unittest
@@ -37,7 +38,8 @@ import weakref
from collections import deque, UserList
from itertools import cycle, count
from test import support
-from test.support.script_helper import assert_python_ok, run_python_until_end
+from test.support.script_helper import (
+ assert_python_ok, assert_python_failure, run_python_until_end)
from test.support import FakePath
import codecs
@@ -4130,6 +4132,51 @@ class MiscIOTest(unittest.TestCase):
# there used to be a buffer overflow in the parser for rawmode
self.assertRaises(ValueError, self.open, support.TESTFN, 'rwax+')
+ def test_check_encoding_errors(self):
+ # bpo-37388: open() and TextIOWrapper must check encoding and errors
+ # arguments in dev mode
+ mod = self.io.__name__
+ filename = __file__
+ invalid = 'Boom, Shaka Laka, Boom!'
+ code = textwrap.dedent(f'''
+ import sys
+ from {mod} import open, TextIOWrapper
+
+ try:
+ open({filename!r}, encoding={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(21)
+
+ try:
+ open({filename!r}, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(22)
+
+ fp = open({filename!r}, "rb")
+ with fp:
+ try:
+ TextIOWrapper(fp, encoding={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(23)
+
+ try:
+ TextIOWrapper(fp, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(24)
+
+ sys.exit(10)
+ ''')
+ proc = assert_python_failure('-X', 'dev', '-c', code)
+ self.assertEqual(proc.rc, 10, proc)
+
class CMiscIOTest(MiscIOTest):
io = io
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 36b72e40c7..177d80d27e 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -11,9 +11,11 @@ import itertools
import operator
import struct
import sys
+import textwrap
import unittest
import warnings
from test import support, string_tests
+from test.support.script_helper import assert_python_failure
# Error handling (bad decoder return)
def search_function(encoding):
@@ -2436,6 +2438,66 @@ class UnicodeTest(string_tests.CommonTest,
support.check_free_after_iterating(self, iter, str)
support.check_free_after_iterating(self, reversed, str)
+ def test_check_encoding_errors(self):
+ # bpo-37388: str(bytes) and str.decode() must check encoding and errors
+ # arguments in dev mode
+ encodings = ('ascii', 'utf8', 'latin1')
+ invalid = 'Boom, Shaka Laka, Boom!'
+ code = textwrap.dedent(f'''
+ import sys
+ encodings = {encodings!r}
+
+ for data in (b'', b'short string'):
+ try:
+ str(data, encoding={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(21)
+
+ try:
+ str(data, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(22)
+
+ for encoding in encodings:
+ try:
+ str(data, encoding, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(22)
+
+ for data in ('', 'short string'):
+ try:
+ data.encode(encoding={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(23)
+
+ try:
+ data.encode(errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(24)
+
+ for encoding in encodings:
+ try:
+ data.encode(encoding, errors={invalid!r})
+ except LookupError:
+ pass
+ else:
+ sys.exit(24)
+
+ sys.exit(10)
+ ''')
+ proc = assert_python_failure('-X', 'dev', '-c', code)
+ self.assertEqual(proc.rc, 10, proc)
+
class CAPITest(unittest.TestCase):