diff options
-rw-r--r-- | AUTHORS.txt | 1 | ||||
-rw-r--r-- | CHANGES.txt | 3 | ||||
-rw-r--r-- | coverage/phystokens.py | 4 | ||||
-rw-r--r-- | tests/test_phystokens.py | 59 |
4 files changed, 52 insertions, 15 deletions
diff --git a/AUTHORS.txt b/AUTHORS.txt index 2202f9c0..5ea7e040 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -20,6 +20,7 @@ Martin Fuzzey Imri Goldberg Bill Hart Christian Heimes +Roger Hu Devin Jeanpierre Ross Lawley Edward Loper diff --git a/CHANGES.txt b/CHANGES.txt index 47976b33..34c9473e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -21,6 +21,9 @@ Change history for Coverage.py issues a spurious warning about the trace function changing: "Trace function changed, measurement is likely wrong: None." This fixes `issue 164`_. +- Source files with encoding declarations, but a blank first line, were not + decoded properly. Now they are. Thanks, Roger Hu. + - The source kit now includes the `__main__.py` file in the root coverage directory, fixing `issue 255`_. diff --git a/coverage/phystokens.py b/coverage/phystokens.py index df569fc0..9281a447 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -122,7 +122,7 @@ def source_encoding(source): cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)") # Do this so the detect_encode code we copied will work. - readline = iter(source.splitlines()).next + readline = iter(source.splitlines(True)).next def _get_normal_name(orig_enc): """Imitates get_normal_name in tokenizer.c.""" @@ -188,7 +188,7 @@ def source_encoding(source): bom_found = True first = first[3:] default = 'utf-8-sig' - if first is None: + if not first: return default encoding = find_cookie(first) diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py index 5a9ddac6..261a2dbd 100644 --- a/tests/test_phystokens.py +++ b/tests/test_phystokens.py @@ -1,6 +1,6 @@ """Tests for Coverage.py's improved tokenizer.""" -import os, re +import os, re, sys from tests.coveragetest import CoverageTest from coverage.phystokens import source_token_lines, source_encoding @@ -78,16 +78,49 @@ class PhysTokensTest(CoverageTest): stress = os.path.join(HERE, "stress_phystoken_dos.tok") self.check_file_tokenization(stress) - def test_source_encoding_detect_utf8(self): - source = """\ -# coding=utf-8 -""" - self.assertEqual(source_encoding(source), 'utf-8') - - def test_source_encoding_second_line_detect_utf8(self): - """ Verifies that UTF-8 encoding will still be detected in spite of the newline.""" - source = """\ -# coding=utf-8 -""" - self.assertEqual(source_encoding(source), 'utf-8') +# source_encoding is only used on Py2. +if sys.version_info < (3, 0): + class SourceEncodingTest(CoverageTest): + """Tests of source_encoding() for detecting encodings on Py2.""" + + run_in_temp_dir = False + + if sys.version_info >= (2,4): + default_encoding = 'ascii' + else: + default_encoding = 'iso8859-1' + + def test_detect_source_encoding(self): + # Various forms from http://www.python.org/dev/peps/pep-0263/ + source = "# coding=cp850\n\n" + self.assertEqual(source_encoding(source), 'cp850') + source = "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n" + self.assertEqual(source_encoding(source), 'utf-8') + source = "#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n" + self.assertEqual(source_encoding(source), 'utf8') + source = "# This Python file uses this encoding: utf-8\n" + self.assertEqual(source_encoding(source), 'utf-8') + + def test_detect_source_encoding_on_second_line(self): + # A coding declaration should be found despite a first blank line. + source = "\n# coding=cp850\n\n" + self.assertEqual(source_encoding(source), 'cp850') + + def test_dont_detect_source_encoding_on_third_line(self): + # A coding declaration doesn't count on the third line. + source = "\n\n# coding=cp850\n\n" + self.assertEqual(source_encoding(source), self.default_encoding) + + def test_detect_source_encoding_of_empty_file(self): + # An important edge case. + self.assertEqual(source_encoding(""), self.default_encoding) + + def test_bom(self): + # A BOM means utf-8. + source = "\xEF\xBB\xBFtext = 'hello'\n" + self.assertEqual(source_encoding(source), 'utf-8-sig') + + # But it has to be the only authority. + source = "\xEF\xBB\xBF# coding: cp850\n" + self.assertRaises(SyntaxError, source_encoding, source) |