diff options
author | Roger Hu <roger.hu@gmail.com> | 2013-05-25 01:31:27 +0000 |
---|---|---|
committer | Roger Hu <roger.hu@gmail.com> | 2013-05-25 01:31:27 +0000 |
commit | 6f35b133e6c615e28c126975a1698ee669630cff (patch) | |
tree | c34f71e8cd1510342fa8481046c8c143380ca341 | |
parent | b33d4557dd46651034bc37030565e6b2a362ee6c (diff) | |
download | python-coveragepy-git-6f35b133e6c615e28c126975a1698ee669630cff.tar.gz |
Make UTF-8 detection more robust.
If the 1st line of the Python is blank/empty, the function assumes that the encoding is 'ascii' and
doesn't try for the 2nd line.
-rw-r--r-- | coverage/phystokens.py | 2 | ||||
-rw-r--r-- | tests/test_phystokens.py | 16 |
2 files changed, 16 insertions, 2 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py index 166020e1..df569fc0 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -188,7 +188,7 @@ def source_encoding(source): bom_found = True first = first[3:] default = 'utf-8-sig' - if not first: + if first is None: return default encoding = find_cookie(first) diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py index 6d38a0e5..5a9ddac6 100644 --- a/tests/test_phystokens.py +++ b/tests/test_phystokens.py @@ -2,7 +2,7 @@ import os, re from tests.coveragetest import CoverageTest -from coverage.phystokens import source_token_lines +from coverage.phystokens import source_token_lines, source_encoding SIMPLE = """\ @@ -77,3 +77,17 @@ class PhysTokensTest(CoverageTest): self.check_file_tokenization(stress) stress = os.path.join(HERE, "stress_phystoken_dos.tok") self.check_file_tokenization(stress) + + def test_source_encoding_detect_utf8(self): + source = """\ +# coding=utf-8 +""" + self.assertEqual(source_encoding(source), 'utf-8') + + def test_source_encoding_second_line_detect_utf8(self): + """ Verifies that UTF-8 encoding will still be detected in spite of the newline.""" + source = """\ + +# coding=utf-8 +""" + self.assertEqual(source_encoding(source), 'utf-8') |