diff options
-rw-r--r-- | CHANGES.txt | 5 | ||||
-rw-r--r-- | coverage/backward.py | 75 | ||||
-rw-r--r-- | coverage/html.py | 20 | ||||
-rw-r--r-- | coverage/phystokens.py | 100 | ||||
-rw-r--r-- | test/farm/html/gold_isolatin1/index.html | 89 | ||||
-rw-r--r-- | test/farm/html/gold_isolatin1/isolatin1.html | 91 | ||||
-rw-r--r-- | test/farm/html/run_isolatin1.py | 21 | ||||
-rw-r--r-- | test/farm/html/src/isolatin1.py | 5 |
8 files changed, 357 insertions, 49 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 1925d796..ccc41abe 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,10 @@ Change history for Coverage.py Version 3.5.2b1 --------------- +- Source files with custom encodings declared in a comment at the top are now + properly handled during reporting on Python 2. Python 3 always handled them + properly. This fixes `issue 157`_. + - When running a module with ``coverage run -m <modulename>``, certain details of the execution environment weren't the same as for ``python -m <modulename>``. This had the unfortunate side-effect of making @@ -18,6 +22,7 @@ Version 3.5.2b1 the C tracer function, closing `issue 166`_. .. _issue 155: https://bitbucket.org/ned/coveragepy/issue/155/cant-use-coverage-run-m-unittest-discover +.. _issue 157: https://bitbucket.org/ned/coveragepy/issue/157/chokes-on-source-files-with-non-utf-8 .. _issue 166: https://bitbucket.org/ned/coveragepy/issue/166/dont-try-to-compile-c-extension-on-pypy diff --git a/coverage/backward.py b/coverage/backward.py index 93cb793a..236bef8e 100644 --- a/coverage/backward.py +++ b/coverage/backward.py @@ -6,7 +6,7 @@ # W0611: Unused import blah # W0622: Redefining built-in blah -import os, sys +import os, re, sys # Python 2.3 doesn't have `set` try: @@ -72,30 +72,38 @@ try: except ImportError: import ConfigParser as configparser -# Python 3.2 provides `tokenize.open`, the best way to open source files. -import tokenize -try: - open_source = tokenize.open # pylint: disable=E1101 -except AttributeError: +# Reading Python source and interpreting the coding comment is a big deal. +if sys.version_info >= (3, 0): + # Python 3.2 provides `tokenize.open`, the best way to open source files. + import tokenize try: - detect_encoding = tokenize.detect_encoding # pylint: disable=E1101 + open_source = tokenize.open # pylint: disable=E1101 except AttributeError: - def open_source(fname): - """Open a source file the best way.""" - return open(fname, "rU") - else: - from io import TextIOWrapper - # Copied from the 3.2 stdlib: - def open_source(fname): - """Open a file in read only mode using the encoding detected by - detect_encoding(). - """ - buffer = open(fname, 'rb') - encoding, _ = detect_encoding(buffer.readline) - buffer.seek(0) - text = TextIOWrapper(buffer, encoding, line_buffering=True) - text.mode = 'r' - return text + try: + detect_encoding = tokenize.detect_encoding # pylint: disable=E1101 + except AttributeError: + assert 3 == 4 + def open_source(fname): + """Open a source file the best way.""" + return open(fname, "rU") + else: + from io import TextIOWrapper + # Copied from the 3.2 stdlib: + def open_source(fname): + """Open a file in read only mode using the encoding detected by + detect_encoding(). + """ + buffer = open(fname, 'rb') + encoding, _ = detect_encoding(buffer.readline) + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, line_buffering=True) + text.mode = 'r' + return text +else: + def open_source(fname): + """Open a source file the best way.""" + return open(fname, "rU") + # Python 3.x is picky about bytes and strings, so provide methods to # get them right, and make them no-ops in 2.x @@ -117,27 +125,6 @@ else: """Convert bytes `b` to a string (no-op in 2.x).""" return b -# A few details about writing encoded text are different in 2.x and 3.x. -if sys.version_info >= (3, 0): - def write_encoded(fname, text, encoding='utf8', errors='strict'): - '''Write string `text` to file names `fname`, with encoding.''' - # Don't use "with", so that this file is still good for old 2.x. - f = open(fname, 'w', encoding=encoding, errors=errors) - try: - f.write(text) - finally: - f.close() -else: - # It's not clear that using utf8 strings in 2.x is the right thing to do. - def write_encoded(fname, text, encoding='utf8', errors='strict'): - '''Write utf8 string `text` to file names `fname`, with encoding.''' - import codecs - f = codecs.open(fname, 'w', encoding=encoding, errors=errors) - try: - f.write(text.decode('utf8')) - finally: - f.close() - # Md5 is available in different places. try: import hashlib diff --git a/coverage/html.py b/coverage/html.py index 00b92587..09683ad8 100644 --- a/coverage/html.py +++ b/coverage/html.py @@ -1,11 +1,11 @@ """HTML reporting for Coverage.""" -import os, re, shutil +import os, re, shutil, sys import coverage -from coverage.backward import pickle, write_encoded +from coverage.backward import pickle from coverage.misc import CoverageException, Hasher -from coverage.phystokens import source_token_lines +from coverage.phystokens import source_token_lines, source_encoding from coverage.report import Reporter from coverage.templite import Templite @@ -100,7 +100,11 @@ class HtmlReporter(Reporter): def write_html(self, fname, html): """Write `html` to `fname`, properly encoded.""" - write_encoded(fname, html, 'ascii', 'xmlcharrefreplace') + fout = open(fname, "wb") + try: + fout.write(html.encode('ascii', 'xmlcharrefreplace')) + finally: + fout.close() def file_hash(self, source, cu): """Compute a hash that changes if the file needs to be re-reported.""" @@ -128,6 +132,12 @@ class HtmlReporter(Reporter): self.status.set_file_hash(flat_rootname, this_hash) + # If need be, determine the encoding of the source file. We use it + # later to properly write the HTML. + if sys.version_info < (3, 0): + encoding = source_encoding(source) + + # Get the numbers for this file. nums = analysis.numbers missing_branch_arcs = analysis.missing_branch_arcs() @@ -195,6 +205,8 @@ class HtmlReporter(Reporter): html_path = os.path.join(self.directory, html_filename) html = spaceless(self.source_tmpl.render(locals())) + if sys.version_info < (3, 0): + html = html.decode(encoding) self.write_html(html_path, html) # Save this file's information for the index file. diff --git a/coverage/phystokens.py b/coverage/phystokens.py index fc4f2c90..850f78bd 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -1,6 +1,6 @@ """Better tokenizing for coverage.py.""" -import keyword, re, token, tokenize +import codecs, keyword, re, sys, token, tokenize from coverage.backward import StringIO # pylint: disable=W0622 def phys_tokens(toks): @@ -106,3 +106,101 @@ def source_token_lines(source): if line: yield line + +def source_encoding(source): + """Determine the encoding for `source` (a string), according to PEP 263. + + Returns a string, the name of the encoding. + + """ + # Note: this function should never be called on Python 3, since py3 has + # built-in tools to do this. + assert sys.version_info < (3, 0) + + # This is mostly code adapted rom Py3.2's tokenize module. + + cookie_re = re.compile("coding[:=]\s*([-\w.]+)") + + # Do this so the detect_encode code we copied will work. + readline = iter(source.splitlines()).next + + def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if re.match(r"^utf-8($|-)", enc): + return "utf-8" + if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc): + return "iso-8859-1" + return orig_enc + + # From detect_encode(): + # It detects the encoding from the presence of a utf-8 bom or an encoding + # cookie as specified in pep-0263. If both a bom and a cookie are present, + # but disagree, a SyntaxError will be raised. If the encoding cookie is an + # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, + # 'utf-8-sig' is returned. + + # If no encoding is specified, then the default will be returned. The + # default varied with version. + + if sys.version_info <= (2, 4): + default = 'iso-8859-1' + else: + default = 'ascii' + + bom_found = False + encoding = None + + def read_or_stop(): + """Get the next source line, or ''.""" + try: + return readline() + except StopIteration: + return '' + + def find_cookie(line): + """Find an encoding cookie in `line`.""" + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + + matches = cookie_re.findall(line_string) + if not matches: + return None + encoding = _get_normal_name(matches[0]) + try: + codec = codecs.lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError("unknown encoding: " + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(codecs.BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default + + encoding = find_cookie(first) + if encoding: + return encoding + + second = read_or_stop() + if not second: + return default + + encoding = find_cookie(second) + if encoding: + return encoding + + return default diff --git a/test/farm/html/gold_isolatin1/index.html b/test/farm/html/gold_isolatin1/index.html new file mode 100644 index 00000000..6e9f3ca7 --- /dev/null +++ b/test/farm/html/gold_isolatin1/index.html @@ -0,0 +1,89 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> + <meta http-equiv='Content-Type' content='text/html; charset=utf-8'> + <title>Coverage report</title> + <link rel='stylesheet' href='style.css' type='text/css'> + <script type='text/javascript' src='jquery-1.4.3.min.js'></script> + <script type='text/javascript' src='jquery.tablesorter.min.js'></script> + <script type='text/javascript' src='jquery.hotkeys.js'></script> + <script type='text/javascript' src='coverage_html.js'></script> + <script type='text/javascript' charset='utf-8'> + jQuery(document).ready(coverage.index_ready); + </script> +</head> +<body id='indexfile'> + +<div id='header'> + <div class='content'> + <h1>Coverage report: + <span class='pc_cov'>100%</span> + </h1> + <img id='keyboard_icon' src='keybd_closed.png'> + </div> +</div> + +<div class='help_panel'> + <img id='panel_icon' src='keybd_open.png'> + <p class='legend'>Hot-keys on this page</p> + <div> + <p class='keyhelp'> + <span class='key'>n</span> + <span class='key'>s</span> + <span class='key'>m</span> + <span class='key'>x</span> + + <span class='key'>c</span> change column sorting + </p> + </div> +</div> + +<div id='index'> + <table class='index'> + <thead> + + <tr class='tablehead' title='Click to sort'> + <th class='name left headerSortDown shortkey_n'>Module</th> + <th class='shortkey_s'>statements</th> + <th class='shortkey_m'>missing</th> + <th class='shortkey_x'>excluded</th> + + <th class='right shortkey_c'>coverage</th> + </tr> + </thead> + + <tfoot> + <tr class='total'> + <td class='name left'>Total</td> + <td>2</td> + <td>0</td> + <td>0</td> + + <td class='right'>100%</td> + </tr> + </tfoot> + <tbody> + + <tr class='file'> + <td class='name left'><a href='isolatin1.html'>isolatin1</a></td> + <td>2</td> + <td>0</td> + <td>0</td> + + <td class='right'>100%</td> + </tr> + + </tbody> + </table> +</div> + +<div id='footer'> + <div class='content'> + <p> + <a class='nav' href='http://nedbatchelder.com/code/coverage/3.5.2b1'>coverage.py v3.5.2b1</a> + </p> + </div> +</div> + +</body> +</html> diff --git a/test/farm/html/gold_isolatin1/isolatin1.html b/test/farm/html/gold_isolatin1/isolatin1.html new file mode 100644 index 00000000..276a6c25 --- /dev/null +++ b/test/farm/html/gold_isolatin1/isolatin1.html @@ -0,0 +1,91 @@ +<!doctype html PUBLIC "-//W3C//DTD html 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<html> +<head> + <meta http-equiv='Content-Type' content='text/html; charset=utf-8'> + + + <meta http-equiv='X-UA-Compatible' content='IE=emulateIE7' /> + <title>Coverage for isolatin1: 100%</title> + <link rel='stylesheet' href='style.css' type='text/css'> + <script type='text/javascript' src='jquery-1.4.3.min.js'></script> + <script type='text/javascript' src='jquery.hotkeys.js'></script> + <script type='text/javascript' src='jquery.isonscreen.js'></script> + <script type='text/javascript' src='coverage_html.js'></script> + <script type='text/javascript' charset='utf-8'> + jQuery(document).ready(coverage.pyfile_ready); + </script> +</head> +<body id='pyfile'> + +<div id='header'> + <div class='content'> + <h1>Coverage for <b>isolatin1</b> : + <span class='pc_cov'>100%</span> + </h1> + <img id='keyboard_icon' src='keybd_closed.png'> + <h2 class='stats'> + 2 statements + <span class='run hide_run shortkey_r' onclick='coverage.toggle_lines(this, "run")'>2 run</span> + <span class='mis shortkey_m' onclick='coverage.toggle_lines(this, "mis")'>0 missing</span> + <span class='exc shortkey_x' onclick='coverage.toggle_lines(this, "exc")'>0 excluded</span> + + </h2> + </div> +</div> + +<div class='help_panel'> + <img id='panel_icon' src='keybd_open.png'> +<p class='legend'>Hot-keys on this page</p> + <div> +<p class='keyhelp'> + <span class='key'>r</span> + <span class='key'>m</span> + <span class='key'>x</span> + <span class='key'>p</span> toggle line displays + </p> +<p class='keyhelp'> + <span class='key'>j</span> + <span class='key'>k</span> next/prev highlighted chunk + </p> +<p class='keyhelp'> + <span class='key'>0</span> (zero) top of page + </p> +<p class='keyhelp'> + <span class='key'>1</span> (one) first highlighted chunk + </p> + </div> +</div> + +<div id='source'> + <table cellspacing='0' cellpadding='0'> + <tr> + <td class='linenos' valign='top'> +<p id='n1' class='pln'><a href='#n1'>1</a></p> +<p id='n2' class='pln'><a href='#n2'>2</a></p> +<p id='n3' class='pln'><a href='#n3'>3</a></p> +<p id='n4' class='stm run hide_run'><a href='#n4'>4</a></p> +<p id='n5' class='stm run hide_run'><a href='#n5'>5</a></p> + + </td> + <td class='text' valign='top'> +<p id='t1' class='pln'><span class='com'># A python source file in another encoding.</span><span class='strut'> </span></p> +<p id='t2' class='pln'><span class='com'># -*- coding: iso8859-1 -*-</span><span class='strut'> </span></p> +<p id='t3' class='pln'><span class='strut'> </span></p> +<p id='t4' class='stm run hide_run'><span class='nam'>math</span> <span class='op'>=</span> <span class='str'>"3×4 = 12, ÷2 = 6±0"</span><span class='strut'> </span></p> +<p id='t5' class='stm run hide_run'><span class='key'>assert</span> <span class='nam'>len</span><span class='op'>(</span><span class='nam'>math</span><span class='op'>)</span> <span class='op'>==</span> <span class='num'>18</span><span class='strut'> </span></p> + + </td> + </tr> + </table> +</div> + +<div id='footer'> + <div class='content'> + <p> + <a class='nav' href='index.html'>« index</a> <a class='nav' href='http://nedbatchelder.com/code/coverage/3.5.2b1'>coverage.py v3.5.2b1</a> + </p> + </div> +</div> + +</body> +</html> diff --git a/test/farm/html/run_isolatin1.py b/test/farm/html/run_isolatin1.py new file mode 100644 index 00000000..3d4b23c5 --- /dev/null +++ b/test/farm/html/run_isolatin1.py @@ -0,0 +1,21 @@ +import sys + +def html_it(): + """Run coverage and make an HTML report for unicode.py.""" + import coverage + cov = coverage.coverage() + cov.start() + import isolatin1 + cov.stop() + cov.html_report(isolatin1, directory="../html_isolatin1") + +runfunc(html_it, rundir="src") + +# HTML files will change often. Check that the sizes are reasonable, +# and check that certain key strings are in the output. +compare("gold_isolatin1", "html_isolatin1", size_within=10, file_pattern="*.html") +contains("html_isolatin1/isolatin1.html", + "<span class='str'>"3×4 = 12, ÷2 = 6±0"</span>", + ) + +clean("html_isolatin1") diff --git a/test/farm/html/src/isolatin1.py b/test/farm/html/src/isolatin1.py new file mode 100644 index 00000000..057c097b --- /dev/null +++ b/test/farm/html/src/isolatin1.py @@ -0,0 +1,5 @@ +# A python source file in another encoding. +# -*- coding: iso8859-1 -*- + +math = "3×4 = 12, ÷2 = 6±0" +assert len(math) == 18 |