From 460d303ed2361abe554da075c184a370d7d54701 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 29 Jun 2014 14:35:02 -0700 Subject: Fix non-comment encoding detection. --HG-- branch : fix_source_encoding --- coverage/phystokens.py | 2 +- tests/test_phystokens.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/coverage/phystokens.py b/coverage/phystokens.py index e79ce01f..867388f7 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -120,7 +120,7 @@ def source_encoding(source): # This is mostly code adapted from Py3.2's tokenize module. - cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)") + cookie_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)") # Do this so the detect_encode code we copied will work. readline = iter(source.splitlines(True)).next diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py index e15400b6..4755c167 100644 --- a/tests/test_phystokens.py +++ b/tests/test_phystokens.py @@ -97,6 +97,11 @@ if sys.version_info < (3, 0): source = "# This Python file uses this encoding: utf-8\n" self.assertEqual(source_encoding(source), 'utf-8') + def test_detect_source_encoding_not_in_comment(self): + # Should not detect anything here + source = 'def parse(src, encoding=None):\n pass' + self.assertEqual(source_encoding(source), 'ascii') + def test_detect_source_encoding_on_second_line(self): # A coding declaration should be found despite a first blank line. source = "\n# coding=cp850\n\n" -- cgit v1.2.1