1 files changed, 41 insertions, 16 deletions
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 7edd6aa4..b4a106fd 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -1,21 +1,26 @@
-"""Tests for Coverage.py's improved tokenizer."""
+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
+# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
+
+"""Tests for coverage.py's improved tokenizer."""
 
 import os.path
 import re
 
 from coverage import env
 from coverage.phystokens import source_token_lines, source_encoding
+from coverage.phystokens import neuter_encoding_declaration
+from coverage.python import get_python_source
 
 from tests.coveragetest import CoverageTest
 
 
-SIMPLE = """\
+SIMPLE = u"""\
 # yay!
 def foo():
   say('two = %d' % 2)
 """
 
-MIXED_WS = """\
+MIXED_WS = u"""\
 def hello():
         a="Hello world!"
 \tb="indented"
@@ -25,7 +30,7 @@ HERE = os.path.dirname(__file__)
 
 
 class PhysTokensTest(CoverageTest):
-    """Tests for Coverage.py's improved tokenizer."""
+    """Tests for coverage.py's improved tokenizer."""
 
     run_in_temp_dir = False
 
@@ -44,9 +49,7 @@ class PhysTokensTest(CoverageTest):
 
     def check_file_tokenization(self, fname):
         """Use the contents of `fname` for `check_tokenization`."""
-        with open(fname) as f:
-            source = f.read()
-        self.check_tokenization(source)
+        self.check_tokenization(get_python_source(fname))
 
     def test_simple(self):
         self.assertEqual(list(source_token_lines(SIMPLE)),
@@ -92,21 +95,27 @@ else:
     DEF_ENCODING = "ascii"
 
 
+ENCODING_DECLARATION_SOURCES = [
+    # Various forms from http://www.python.org/dev/peps/pep-0263/
+    b"# coding=cp850\n\n",
+    b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n",
+    b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n",
+    b"# This Python file uses this encoding: cp850\n",
+    b"# This file uses a different encoding:\n# coding: cp850\n",
+]
+
 class SourceEncodingTest(CoverageTest):
     """Tests of source_encoding() for detecting encodings."""
 
     run_in_temp_dir = False
 
     def test_detect_source_encoding(self):
-        # Various forms from http://www.python.org/dev/peps/pep-0263/
-        source = b"# coding=cp850\n\n"
-        self.assertEqual(source_encoding(source), 'cp850')
-        source = b"#!/usr/bin/python\n# -*- coding: utf-8 -*-\n"
-        self.assertEqual(source_encoding(source), 'utf-8')
-        source = b"#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n"
-        self.assertEqual(source_encoding(source), 'utf8')
-        source = b"# This Python file uses this encoding: utf-8\n"
-        self.assertEqual(source_encoding(source), 'utf-8')
+        for source in ENCODING_DECLARATION_SOURCES:
+            self.assertEqual(
+                source_encoding(source),
+                'cp850',
+                "Wrong encoding in %r" % source
+            )
 
     def test_detect_source_encoding_not_in_comment(self):
         if env.PYPY and env.PY3:
@@ -140,3 +149,19 @@ class SourceEncodingTest(CoverageTest):
         source = b"\xEF\xBB\xBF# coding: cp850\n"
         with self.assertRaises(SyntaxError):
             source_encoding(source)
+
+
+class NeuterEncodingDeclarationTest(CoverageTest):
+    """Tests of phystokens.neuter_encoding_declaration()."""
+
+    run_in_temp_dir = False
+
+    def test_neuter_encoding_declaration(self):
+        for source in ENCODING_DECLARATION_SOURCES:
+            neutered = neuter_encoding_declaration(source.decode("ascii"))
+            neutered = neutered.encode("ascii")
+            self.assertEqual(
+                source_encoding(neutered),
+                DEF_ENCODING,
+                "Wrong encoding in %r" % neutered
+            )