summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/test_phystokens.py39
1 files changed, 27 insertions, 12 deletions
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 1cc51430..15c6cb3a 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -15,6 +15,7 @@ from coverage.python import get_python_source
from tests.coveragetest import CoverageTest
+# A simple program and its token stream.
SIMPLE = u"""\
# yay!
def foo():
@@ -29,6 +30,7 @@ SIMPLE_TOKENS = [
('ws', ' '), ('num', '2'), ('op', ')')],
]
+# Mixed-whitespace program, and its token stream.
MIXED_WS = u"""\
def hello():
a="Hello world!"
@@ -41,6 +43,7 @@ MIXED_WS_TOKENS = [
[('ws', ' '), ('nam', 'b'), ('op', '='), ('str', '"indented"')],
]
+# Where this file is, so we can find other files next to it.
HERE = os.path.dirname(__file__)
@@ -100,13 +103,15 @@ else:
ENCODING_DECLARATION_SOURCES = [
# Various forms from http://www.python.org/dev/peps/pep-0263/
- (1, b"# coding=cp850\n\n"),
- (1, b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n"),
- (1, b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n"),
- (1, b"# This Python file uses this encoding: cp850\n"),
- (1, b"# This file uses a different encoding:\n# coding: cp850\n"),
- (1, b"\n# coding=cp850\n\n"),
- (2, b"# -*- coding:cp850 -*-\n# vim: fileencoding=cp850\n"),
+ (1, b"# coding=cp850\n\n", "cp850"),
+ (1, b"# coding=latin-1\n", "iso-8859-1"),
+ (1, b"# coding=iso-latin-1\n", "iso-8859-1"),
+ (1, b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n", "cp850"),
+ (1, b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n", "cp850"),
+ (1, b"# This Python file uses this encoding: cp850\n", "cp850"),
+ (1, b"# This file uses a different encoding:\n# coding: cp850\n", "cp850"),
+ (1, b"\n# coding=cp850\n\n", "cp850"),
+ (2, b"# -*- coding:cp850 -*-\n# vim: fileencoding=cp850\n", "cp850"),
]
class SourceEncodingTest(CoverageTest):
@@ -115,10 +120,10 @@ class SourceEncodingTest(CoverageTest):
run_in_temp_dir = False
def test_detect_source_encoding(self):
- for _, source in ENCODING_DECLARATION_SOURCES:
+ for _, source, expected in ENCODING_DECLARATION_SOURCES:
self.assertEqual(
source_encoding(source),
- 'cp850',
+ expected,
"Wrong encoding in %r" % source
)
@@ -145,9 +150,19 @@ class SourceEncodingTest(CoverageTest):
source = b"\xEF\xBB\xBFtext = 'hello'\n"
self.assertEqual(source_encoding(source), 'utf-8-sig')
- # But it has to be the only authority.
+ def test_bom_with_encoding(self):
+ source = b"\xEF\xBB\xBF# coding: utf-8\ntext = 'hello'\n"
+ self.assertEqual(source_encoding(source), 'utf-8-sig')
+
+ def test_bom_is_wrong(self):
+ # A BOM with an explicit non-utf8 encoding is an error.
source = b"\xEF\xBB\xBF# coding: cp850\n"
- with self.assertRaises(SyntaxError):
+ with self.assertRaisesRegex(SyntaxError, "encoding problem: utf-8"):
+ source_encoding(source)
+
+ def test_unknown_encoding(self):
+ source = b"# coding: klingon\n"
+ with self.assertRaisesRegex(SyntaxError, "unknown encoding: klingon"):
source_encoding(source)
@@ -157,7 +172,7 @@ class NeuterEncodingDeclarationTest(CoverageTest):
run_in_temp_dir = False
def test_neuter_encoding_declaration(self):
- for lines_diff_expected, source in ENCODING_DECLARATION_SOURCES:
+ for lines_diff_expected, source, _ in ENCODING_DECLARATION_SOURCES:
neutered = neuter_encoding_declaration(source.decode("ascii"))
neutered = neutered.encode("ascii")