diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2015-05-17 20:15:42 -0400 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2015-05-17 20:15:42 -0400 |
commit | 9e764b195723b15f29ed8182d1c6e37ac4a52c58 (patch) | |
tree | b36b42b2c83c1c7fa27b9955ea257a64d36391c4 /coverage/phystokens.py | |
parent | 5c78f193074cd74154020a6053720fa09dc26928 (diff) | |
download | python-coveragepy-git-9e764b195723b15f29ed8182d1c6e37ac4a52c58.tar.gz |
All Python source is Unicode internally.
Unfortunately, this meant hacking around a silly Python 2 restriction
(can't compile a Unicode string containing an encoding declaration).
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r-- | coverage/phystokens.py | 42 |
1 files changed, 39 insertions, 3 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py index ed6bd238..d21d401c 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -8,6 +8,7 @@ import tokenize from coverage import env from coverage.backward import iternext +from coverage.misc import contract def phys_tokens(toks): @@ -148,6 +149,8 @@ class CachedTokenizer(object): generate_tokens = CachedTokenizer().generate_tokens +COOKIE_RE = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", flags=re.MULTILINE) + def _source_encoding_py2(source): """Determine the encoding for `source`, according to PEP 263. @@ -165,8 +168,6 @@ def _source_encoding_py2(source): # This is mostly code adapted from Py3.2's tokenize module. - cookie_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)") - def _get_normal_name(orig_enc): """Imitates get_normal_name in tokenizer.c.""" # Only care about the first 12 characters. @@ -204,7 +205,7 @@ def _source_encoding_py2(source): except UnicodeDecodeError: return None - matches = cookie_re.findall(line_string) + matches = COOKIE_RE.findall(line_string) if not matches: return None encoding = _get_normal_name(matches[0]) @@ -265,3 +266,38 @@ if env.PY3: source_encoding = _source_encoding_py3 else: source_encoding = _source_encoding_py2 + + +@contract(source='unicode') +def compile_unicode(source, filename, mode): + """Just like the `compile` builtin, but works on any Unicode string. + + Python 2's compile() builtin has a stupid restriction: if the source string + is Unicode, then it may not have a encoding declaration in it. Why not? + Who knows! + + This function catches that exception, neuters the coding declaration, and + compiles it anyway. + + """ + try: + code = compile(source, filename, mode) + except SyntaxError as synerr: + if synerr.args[0] != "encoding declaration in Unicode string": + raise + source = neuter_encoding_declaration(source) + code = compile(source, filename, mode) + + return code + + +@contract(source='unicode', returns='unicode') +def neuter_encoding_declaration(source): + """Return `source`, with any encoding declaration neutered. + + This function will only ever be called on `source` that has an encoding + declaration, so some edge cases can be ignored. + + """ + source = COOKIE_RE.sub("# (deleted declaration)", source) + return source |