refactor: no need for special handling of compiling unicode source

This was a holdover from Python 2 days.
author: Ned Batchelder <ned@nedbatchelder.com> 2022-11-06 15:51:44 -0500
committer: Ned Batchelder <ned@nedbatchelder.com> 2022-11-06 16:16:04 -0500
commit: 556344babd5210c093eba547d1b15489843f4359 (patch)
tree: 5ea9c3f84044722f116ebbfad4bb66cda91b8a87 /coverage
parent: faaf0d45abcf0a11c9e5db144c5b79f581dd92eb (diff)
download: python-coveragepy-git-556344babd5210c093eba547d1b15489843f4359.tar.gz
3 files changed, 4 insertions, 40 deletions
diff --git a/coverage/execfile.py b/coverage/execfile.py
index b5d3a65f..93dffcd1 100644
--- a/coverage/execfile.py
+++ b/coverage/execfile.py
@@ -16,7 +16,6 @@ from coverage import env
 from coverage.exceptions import CoverageException, _ExceptionDuringRun, NoCode, NoSource
 from coverage.files import canonical_filename, python_reported_file
 from coverage.misc import isolate_module
-from coverage.phystokens import compile_unicode
 from coverage.python import get_python_source
 
 os = isolate_module(os)
@@ -274,8 +273,7 @@ def make_code_from_py(filename):
     except (OSError, NoSource) as exc:
         raise NoSource(f"No file to run: '{filename}'") from exc
 
-    code = compile_unicode(source, filename, "exec")
-    return code
+    return compile(source, filename, "exec")
 
 
 def make_code_from_pyc(filename):
diff --git a/coverage/parser.py b/coverage/parser.py
index c4fef9ce..135a3b18 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -15,7 +15,7 @@ from coverage.bytecode import code_objects
 from coverage.debug import short_stack
 from coverage.exceptions import NoSource, NotPython, _StopEverything
 from coverage.misc import contract, join_regex, new_contract, nice_pair, one_of
-from coverage.phystokens import compile_unicode, generate_tokens, neuter_encoding_declaration
+from coverage.phystokens import generate_tokens
 
 
 class PythonParser:
@@ -359,7 +359,7 @@ class ByteParser:
             self.code = code
         else:
             try:
-                self.code = compile_unicode(text, filename, "exec")
+                self.code = compile(text, filename, "exec")
             except SyntaxError as synerr:
                 raise NotPython(
                     "Couldn't parse '%s' as Python source: '%s' at line %d" % (
@@ -624,17 +624,13 @@ class NodeList:
 # TODO: the cause messages have too many commas.
 # TODO: Shouldn't the cause messages join with "and" instead of "or"?
 
-def ast_parse(text):
-    """How we create an AST parse."""
-    return ast.parse(neuter_encoding_declaration(text))
-
 
 class AstArcAnalyzer:
     """Analyze source text with an AST to find executable code paths."""
 
     @contract(text='unicode', statements=set)
     def __init__(self, text, statements, multiline):
-        self.root_node = ast_parse(text)
+        self.root_node = ast.parse(text)
         # TODO: I think this is happening in too many places.
         self.statements = {multiline.get(l, l) for l in statements}
         self.multiline = multiline
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index c6dc1e0a..07ad5349 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -184,8 +184,6 @@ class CachedTokenizer:
 generate_tokens = CachedTokenizer().generate_tokens
 
 
-COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE)
-
 @contract(source='bytes')
 def source_encoding(source):
     """Determine the encoding for `source`, according to PEP 263.
@@ -197,31 +195,3 @@ def source_encoding(source):
     """
     readline = iter(source.splitlines(True)).__next__
     return tokenize.detect_encoding(readline)[0]
-
-
-@contract(source='unicode')
-def compile_unicode(source, filename, mode):
-    """Just like the `compile` builtin, but works on any Unicode string.
-
-    Python 2's compile() builtin has a stupid restriction: if the source string
-    is Unicode, then it may not have a encoding declaration in it.  Why not?
-    Who knows!  It also decodes to utf-8, and then tries to interpret those
-    utf-8 bytes according to the encoding declaration.  Why? Who knows!
-
-    This function neuters the coding declaration, and compiles it.
-
-    """
-    source = neuter_encoding_declaration(source)
-    code = compile(source, filename, mode)
-    return code
-
-
-@contract(source='unicode', returns='unicode')
-def neuter_encoding_declaration(source):
-    """Return `source`, with any encoding declaration neutered."""
-    if COOKIE_RE.search(source):
-        source_lines = source.splitlines(True)
-        for lineno in range(min(2, len(source_lines))):
-            source_lines[lineno] = COOKIE_RE.sub("# (deleted declaration)", source_lines[lineno])
-        source = "".join(source_lines)
-    return source
author	Ned Batchelder <ned@nedbatchelder.com>	2022-11-06 15:51:44 -0500
committer	Ned Batchelder <ned@nedbatchelder.com>	2022-11-06 16:16:04 -0500
commit	556344babd5210c093eba547d1b15489843f4359 (patch)
tree	5ea9c3f84044722f116ebbfad4bb66cda91b8a87 /coverage
parent	faaf0d45abcf0a11c9e5db144c5b79f581dd92eb (diff)
download	python-coveragepy-git-556344babd5210c093eba547d1b15489843f4359.tar.gz