diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2022-11-06 15:51:44 -0500 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2022-11-06 16:16:04 -0500 |
commit | 556344babd5210c093eba547d1b15489843f4359 (patch) | |
tree | 5ea9c3f84044722f116ebbfad4bb66cda91b8a87 /coverage | |
parent | faaf0d45abcf0a11c9e5db144c5b79f581dd92eb (diff) | |
download | python-coveragepy-git-556344babd5210c093eba547d1b15489843f4359.tar.gz |
refactor: no need for special handling of compiling unicode source
This was a holdover from Python 2 days.
Diffstat (limited to 'coverage')
-rw-r--r-- | coverage/execfile.py | 4 | ||||
-rw-r--r-- | coverage/parser.py | 10 | ||||
-rw-r--r-- | coverage/phystokens.py | 30 |
3 files changed, 4 insertions, 40 deletions
diff --git a/coverage/execfile.py b/coverage/execfile.py index b5d3a65f..93dffcd1 100644 --- a/coverage/execfile.py +++ b/coverage/execfile.py @@ -16,7 +16,6 @@ from coverage import env from coverage.exceptions import CoverageException, _ExceptionDuringRun, NoCode, NoSource from coverage.files import canonical_filename, python_reported_file from coverage.misc import isolate_module -from coverage.phystokens import compile_unicode from coverage.python import get_python_source os = isolate_module(os) @@ -274,8 +273,7 @@ def make_code_from_py(filename): except (OSError, NoSource) as exc: raise NoSource(f"No file to run: '{filename}'") from exc - code = compile_unicode(source, filename, "exec") - return code + return compile(source, filename, "exec") def make_code_from_pyc(filename): diff --git a/coverage/parser.py b/coverage/parser.py index c4fef9ce..135a3b18 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -15,7 +15,7 @@ from coverage.bytecode import code_objects from coverage.debug import short_stack from coverage.exceptions import NoSource, NotPython, _StopEverything from coverage.misc import contract, join_regex, new_contract, nice_pair, one_of -from coverage.phystokens import compile_unicode, generate_tokens, neuter_encoding_declaration +from coverage.phystokens import generate_tokens class PythonParser: @@ -359,7 +359,7 @@ class ByteParser: self.code = code else: try: - self.code = compile_unicode(text, filename, "exec") + self.code = compile(text, filename, "exec") except SyntaxError as synerr: raise NotPython( "Couldn't parse '%s' as Python source: '%s' at line %d" % ( @@ -624,17 +624,13 @@ class NodeList: # TODO: the cause messages have too many commas. # TODO: Shouldn't the cause messages join with "and" instead of "or"? -def ast_parse(text): - """How we create an AST parse.""" - return ast.parse(neuter_encoding_declaration(text)) - class AstArcAnalyzer: """Analyze source text with an AST to find executable code paths.""" @contract(text='unicode', statements=set) def __init__(self, text, statements, multiline): - self.root_node = ast_parse(text) + self.root_node = ast.parse(text) # TODO: I think this is happening in too many places. self.statements = {multiline.get(l, l) for l in statements} self.multiline = multiline diff --git a/coverage/phystokens.py b/coverage/phystokens.py index c6dc1e0a..07ad5349 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -184,8 +184,6 @@ class CachedTokenizer: generate_tokens = CachedTokenizer().generate_tokens -COOKIE_RE = re.compile(r"^[ \t]*#.*coding[:=][ \t]*([-\w.]+)", flags=re.MULTILINE) - @contract(source='bytes') def source_encoding(source): """Determine the encoding for `source`, according to PEP 263. @@ -197,31 +195,3 @@ def source_encoding(source): """ readline = iter(source.splitlines(True)).__next__ return tokenize.detect_encoding(readline)[0] - - -@contract(source='unicode') -def compile_unicode(source, filename, mode): - """Just like the `compile` builtin, but works on any Unicode string. - - Python 2's compile() builtin has a stupid restriction: if the source string - is Unicode, then it may not have a encoding declaration in it. Why not? - Who knows! It also decodes to utf-8, and then tries to interpret those - utf-8 bytes according to the encoding declaration. Why? Who knows! - - This function neuters the coding declaration, and compiles it. - - """ - source = neuter_encoding_declaration(source) - code = compile(source, filename, mode) - return code - - -@contract(source='unicode', returns='unicode') -def neuter_encoding_declaration(source): - """Return `source`, with any encoding declaration neutered.""" - if COOKIE_RE.search(source): - source_lines = source.splitlines(True) - for lineno in range(min(2, len(source_lines))): - source_lines[lineno] = COOKIE_RE.sub("# (deleted declaration)", source_lines[lineno]) - source = "".join(source_lines) - return source |