summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2014-12-28 08:45:17 -0500
committerNed Batchelder <ned@nedbatchelder.com>2014-12-28 08:45:17 -0500
commit228c5a07e04eda70074ce40b25512700f5168dc4 (patch)
tree2c308e7b249fc42ed7118abb364ef643d828cf2f
parente748d2cbe359876130fbd9477ecbbb320b9df75b (diff)
downloadpython-coveragepy-git-228c5a07e04eda70074ce40b25512700f5168dc4.tar.gz
Make source_encoding stricter about its arguments, and test it everywhere
-rw-r--r--coverage/phystokens.py19
-rw-r--r--tests/test_phystokens.py109
2 files changed, 76 insertions, 52 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index bf55e8a3..95776251 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -148,11 +148,17 @@ generate_tokens = CachedTokenizer().generate_tokens
def _source_encoding_py2(source):
- """Determine the encoding for `source` (a string), according to PEP 263.
+ """Determine the encoding for `source`, according to PEP 263.
- Returns a string, the name of the encoding.
+ Arguments:
+ source (byte string): the text of the program.
+
+ Returns:
+ string: the name of the encoding.
"""
+ assert isinstance(source, bytes)
+
# Do this so the detect_encode code we copied will work.
readline = iter(source.splitlines(True)).next
@@ -240,11 +246,16 @@ def _source_encoding_py2(source):
def _source_encoding_py3(source):
- """Determine the encoding for `source` (a string), according to PEP 263.
+ """Determine the encoding for `source`, according to PEP 263.
+
+ Arguments:
+ source (byte string): the text of the program.
- Returns a string, the name of the encoding.
+ Returns:
+ string: the name of the encoding.
"""
+ assert isinstance(source, bytes)
readline = iter(source.splitlines(True)).__next__
return tokenize.detect_encoding(readline)[0]
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 10e0225f..bbd956ea 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -1,8 +1,11 @@
"""Tests for Coverage.py's improved tokenizer."""
-#from __future__ import unicode_literals
+import os
+import re
+import sys
+
+from nose.plugins.skip import SkipTest
-import os, re, sys
from tests.coveragetest import CoverageTest
from coverage.phystokens import source_token_lines, source_encoding
@@ -83,49 +86,59 @@ class PhysTokensTest(CoverageTest):
self.check_file_tokenization(stress)
-# source_encoding is only used on Py2.
-if sys.version_info < (3, 0):
- class SourceEncodingTest(CoverageTest):
- """Tests of source_encoding() for detecting encodings on Py2."""
-
- run_in_temp_dir = False
-
- def test_detect_source_encoding(self):
- # Various forms from http://www.python.org/dev/peps/pep-0263/
- source = "# coding=cp850\n\n"
- self.assertEqual(source_encoding(source), 'cp850')
- source = "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
- self.assertEqual(source_encoding(source), 'utf-8')
- source = "#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
- self.assertEqual(source_encoding(source), 'utf8')
- source = "# This Python file uses this encoding: utf-8\n"
- self.assertEqual(source_encoding(source), 'utf-8')
-
- def test_detect_source_encoding_not_in_comment(self):
- # Should not detect anything here
- source = 'def parse(src, encoding=None):\n pass'
- self.assertEqual(source_encoding(source), 'ascii')
-
- def test_detect_source_encoding_on_second_line(self):
- # A coding declaration should be found despite a first blank line.
- source = "\n# coding=cp850\n\n"
- self.assertEqual(source_encoding(source), 'cp850')
-
- def test_dont_detect_source_encoding_on_third_line(self):
- # A coding declaration doesn't count on the third line.
- source = "\n\n# coding=cp850\n\n"
- self.assertEqual(source_encoding(source), 'ascii')
-
- def test_detect_source_encoding_of_empty_file(self):
- # An important edge case.
- self.assertEqual(source_encoding(""), 'ascii')
-
- def test_bom(self):
- # A BOM means utf-8.
- source = "\xEF\xBB\xBFtext = 'hello'\n"
- self.assertEqual(source_encoding(source), 'utf-8-sig')
-
- # But it has to be the only authority.
- source = "\xEF\xBB\xBF# coding: cp850\n"
- with self.assertRaises(SyntaxError):
- source_encoding(source)
+# The default encoding is different in Python 2 and Python 3.
+if sys.version_info >= (3, 0):
+ DEF_ENCODING = "utf-8"
+else:
+ DEF_ENCODING = "ascii"
+
+
+class SourceEncodingTest(CoverageTest):
+ """Tests of source_encoding() for detecting encodings."""
+
+ run_in_temp_dir = False
+
+ def test_detect_source_encoding(self):
+ # Various forms from http://www.python.org/dev/peps/pep-0263/
+ source = b"# coding=cp850\n\n"
+ self.assertEqual(source_encoding(source), 'cp850')
+ source = b"#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
+ self.assertEqual(source_encoding(source), 'utf-8')
+ source = b"#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
+ self.assertEqual(source_encoding(source), 'utf8')
+ source = b"# This Python file uses this encoding: utf-8\n"
+ self.assertEqual(source_encoding(source), 'utf-8')
+
+ def test_detect_source_encoding_not_in_comment(self):
+ if '__pypy__' in sys.builtin_module_names:
+ if sys.version_info > (3, 0):
+ # PyPy3 gets this case wrong. Not sure what I can do about it,
+ # so skip the test.
+ raise SkipTest
+ # Should not detect anything here
+ source = b'def parse(src, encoding=None):\n pass'
+ self.assertEqual(source_encoding(source), DEF_ENCODING)
+
+ def test_detect_source_encoding_on_second_line(self):
+ # A coding declaration should be found despite a first blank line.
+ source = b"\n# coding=cp850\n\n"
+ self.assertEqual(source_encoding(source), 'cp850')
+
+ def test_dont_detect_source_encoding_on_third_line(self):
+ # A coding declaration doesn't count on the third line.
+ source = b"\n\n# coding=cp850\n\n"
+ self.assertEqual(source_encoding(source), DEF_ENCODING)
+
+ def test_detect_source_encoding_of_empty_file(self):
+ # An important edge case.
+ self.assertEqual(source_encoding(b""), DEF_ENCODING)
+
+ def test_bom(self):
+ # A BOM means utf-8.
+ source = b"\xEF\xBB\xBFtext = 'hello'\n"
+ self.assertEqual(source_encoding(source), 'utf-8-sig')
+
+ # But it has to be the only authority.
+ source = b"\xEF\xBB\xBF# coding: cp850\n"
+ with self.assertRaises(SyntaxError):
+ source_encoding(source)