summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Hu <roger.hu@gmail.com>2013-05-25 01:31:27 +0000
committerRoger Hu <roger.hu@gmail.com>2013-05-25 01:31:27 +0000
commit6f35b133e6c615e28c126975a1698ee669630cff (patch)
treec34f71e8cd1510342fa8481046c8c143380ca341
parentb33d4557dd46651034bc37030565e6b2a362ee6c (diff)
downloadpython-coveragepy-git-6f35b133e6c615e28c126975a1698ee669630cff.tar.gz
Make UTF-8 detection more robust.
If the 1st line of the Python is blank/empty, the function assumes that the encoding is 'ascii' and doesn't try for the 2nd line.
-rw-r--r--coverage/phystokens.py2
-rw-r--r--tests/test_phystokens.py16
2 files changed, 16 insertions, 2 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 166020e1..df569fc0 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -188,7 +188,7 @@ def source_encoding(source):
bom_found = True
first = first[3:]
default = 'utf-8-sig'
- if not first:
+ if first is None:
return default
encoding = find_cookie(first)
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 6d38a0e5..5a9ddac6 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -2,7 +2,7 @@
import os, re
from tests.coveragetest import CoverageTest
-from coverage.phystokens import source_token_lines
+from coverage.phystokens import source_token_lines, source_encoding
SIMPLE = """\
@@ -77,3 +77,17 @@ class PhysTokensTest(CoverageTest):
self.check_file_tokenization(stress)
stress = os.path.join(HERE, "stress_phystoken_dos.tok")
self.check_file_tokenization(stress)
+
+ def test_source_encoding_detect_utf8(self):
+ source = """\
+# coding=utf-8
+"""
+ self.assertEqual(source_encoding(source), 'utf-8')
+
+ def test_source_encoding_second_line_detect_utf8(self):
+ """ Verifies that UTF-8 encoding will still be detected in spite of the newline."""
+ source = """\
+
+# coding=utf-8
+"""
+ self.assertEqual(source_encoding(source), 'utf-8')