Improve and more fully test the first-blank-line decoding fix. Thanks, Roger Hu.

author: Ned Batchelder <ned@nedbatchelder.com> 2013-09-16 10:25:36 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2013-09-16 10:25:36 -0400
commit: 58b2eb011cb5036a1654eb731d4cf30f467eda89 (patch)
tree: 8273527b699bcc07edd6fe4dbcec2e808d7fa6fa
parent: 6fa405828c0cfccc1d8181df27613802ba204119 (diff)
download: python-coveragepy-git-58b2eb011cb5036a1654eb731d4cf30f467eda89.tar.gz
4 files changed, 52 insertions, 15 deletions
diff --git a/AUTHORS.txt b/AUTHORS.txt
index 2202f9c0..5ea7e040 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -20,6 +20,7 @@ Martin Fuzzey
 Imri Goldberg
 Bill Hart
 Christian Heimes
+Roger Hu
 Devin Jeanpierre
 Ross Lawley
 Edward Loper
diff --git a/CHANGES.txt b/CHANGES.txt
index 47976b33..34c9473e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -21,6 +21,9 @@ Change history for Coverage.py
   issues a spurious warning about the trace function changing: "Trace function
   changed, measurement is likely wrong: None."  This fixes `issue 164`_.
 
+- Source files with encoding declarations, but a blank first line, were not
+  decoded properly.  Now they are.  Thanks, Roger Hu.
+
 - The source kit now includes the `__main__.py` file in the root coverage
   directory, fixing `issue 255`_.
 
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index df569fc0..9281a447 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -122,7 +122,7 @@ def source_encoding(source):
     cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
 
     # Do this so the detect_encode code we copied will work.
-    readline = iter(source.splitlines()).next
+    readline = iter(source.splitlines(True)).next
 
     def _get_normal_name(orig_enc):
         """Imitates get_normal_name in tokenizer.c."""
@@ -188,7 +188,7 @@ def source_encoding(source):
         bom_found = True
         first = first[3:]
         default = 'utf-8-sig'
-    if first is None:
+    if not first:
         return default
 
     encoding = find_cookie(first)
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 5a9ddac6..261a2dbd 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -1,6 +1,6 @@
 """Tests for Coverage.py's improved tokenizer."""
 
-import os, re
+import os, re, sys
 from tests.coveragetest import CoverageTest
 from coverage.phystokens import source_token_lines, source_encoding
 
@@ -78,16 +78,49 @@ class PhysTokensTest(CoverageTest):
         stress = os.path.join(HERE, "stress_phystoken_dos.tok")
         self.check_file_tokenization(stress)
 
-    def test_source_encoding_detect_utf8(self):
-        source = """\
-# coding=utf-8
-"""
-        self.assertEqual(source_encoding(source), 'utf-8')
-
-    def test_source_encoding_second_line_detect_utf8(self):
-        """ Verifies that UTF-8 encoding will still be detected in spite of the newline."""
-        source = """\
 
-# coding=utf-8
-"""
-        self.assertEqual(source_encoding(source), 'utf-8')
+# source_encoding is only used on Py2.
+if sys.version_info < (3, 0):
+    class SourceEncodingTest(CoverageTest):
+        """Tests of source_encoding() for detecting encodings on Py2."""
+
+        run_in_temp_dir = False
+
+        if sys.version_info >= (2,4):
+            default_encoding = 'ascii'
+        else:
+            default_encoding = 'iso8859-1'
+
+        def test_detect_source_encoding(self):
+            # Various forms from http://www.python.org/dev/peps/pep-0263/
+            source = "# coding=cp850\n\n"
+            self.assertEqual(source_encoding(source), 'cp850')
+            source = "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
+            self.assertEqual(source_encoding(source), 'utf-8')
+            source = "#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
+            self.assertEqual(source_encoding(source), 'utf8')
+            source = "# This Python file uses this encoding: utf-8\n"
+            self.assertEqual(source_encoding(source), 'utf-8')
+
+        def test_detect_source_encoding_on_second_line(self):
+            # A coding declaration should be found despite a first blank line.
+            source = "\n# coding=cp850\n\n"
+            self.assertEqual(source_encoding(source), 'cp850')
+
+        def test_dont_detect_source_encoding_on_third_line(self):
+            # A coding declaration doesn't count on the third line.
+            source = "\n\n# coding=cp850\n\n"
+            self.assertEqual(source_encoding(source), self.default_encoding)
+
+        def test_detect_source_encoding_of_empty_file(self):
+            # An important edge case.
+            self.assertEqual(source_encoding(""), self.default_encoding)
+
+        def test_bom(self):
+            # A BOM means utf-8.
+            source = "\xEF\xBB\xBFtext = 'hello'\n"
+            self.assertEqual(source_encoding(source), 'utf-8-sig')
+
+            # But it has to be the only authority.
+            source = "\xEF\xBB\xBF# coding: cp850\n"
+            self.assertRaises(SyntaxError, source_encoding, source)
author	Ned Batchelder <ned@nedbatchelder.com>	2013-09-16 10:25:36 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2013-09-16 10:25:36 -0400
commit	58b2eb011cb5036a1654eb731d4cf30f467eda89 (patch)
tree	8273527b699bcc07edd6fe4dbcec2e808d7fa6fa
parent	6fa405828c0cfccc1d8181df27613802ba204119 (diff)
download	python-coveragepy-git-58b2eb011cb5036a1654eb731d4cf30f467eda89.tar.gz