summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2012-03-21 20:59:26 -0400
committerNed Batchelder <ned@nedbatchelder.com>2012-03-21 20:59:26 -0400
commit6ca614a61e82f3b2eb0cb8994f53bcb57713beb0 (patch)
treef8695a84c243b23189c003d92b0a25eb8b47eb1f
parentd0ee1989ec56dc039ad880c1d554e66dd4caddd8 (diff)
downloadpython-coveragepy-git-6ca614a61e82f3b2eb0cb8994f53bcb57713beb0.tar.gz
If a source file has an encoding declaration, use it when producing the HTML. Fixes issue #157.
-rw-r--r--CHANGES.txt5
-rw-r--r--coverage/backward.py75
-rw-r--r--coverage/html.py20
-rw-r--r--coverage/phystokens.py100
-rw-r--r--test/farm/html/gold_isolatin1/index.html89
-rw-r--r--test/farm/html/gold_isolatin1/isolatin1.html91
-rw-r--r--test/farm/html/run_isolatin1.py21
-rw-r--r--test/farm/html/src/isolatin1.py5
8 files changed, 357 insertions, 49 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 1925d796..ccc41abe 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -5,6 +5,10 @@ Change history for Coverage.py
Version 3.5.2b1
---------------
+- Source files with custom encodings declared in a comment at the top are now
+ properly handled during reporting on Python 2. Python 3 always handled them
+ properly. This fixes `issue 157`_.
+
- When running a module with ``coverage run -m <modulename>``, certain details
of the execution environment weren't the same as for
``python -m <modulename>``. This had the unfortunate side-effect of making
@@ -18,6 +22,7 @@ Version 3.5.2b1
the C tracer function, closing `issue 166`_.
.. _issue 155: https://bitbucket.org/ned/coveragepy/issue/155/cant-use-coverage-run-m-unittest-discover
+.. _issue 157: https://bitbucket.org/ned/coveragepy/issue/157/chokes-on-source-files-with-non-utf-8
.. _issue 166: https://bitbucket.org/ned/coveragepy/issue/166/dont-try-to-compile-c-extension-on-pypy
diff --git a/coverage/backward.py b/coverage/backward.py
index 93cb793a..236bef8e 100644
--- a/coverage/backward.py
+++ b/coverage/backward.py
@@ -6,7 +6,7 @@
# W0611: Unused import blah
# W0622: Redefining built-in blah
-import os, sys
+import os, re, sys
# Python 2.3 doesn't have `set`
try:
@@ -72,30 +72,38 @@ try:
except ImportError:
import ConfigParser as configparser
-# Python 3.2 provides `tokenize.open`, the best way to open source files.
-import tokenize
-try:
- open_source = tokenize.open # pylint: disable=E1101
-except AttributeError:
+# Reading Python source and interpreting the coding comment is a big deal.
+if sys.version_info >= (3, 0):
+ # Python 3.2 provides `tokenize.open`, the best way to open source files.
+ import tokenize
try:
- detect_encoding = tokenize.detect_encoding # pylint: disable=E1101
+ open_source = tokenize.open # pylint: disable=E1101
except AttributeError:
- def open_source(fname):
- """Open a source file the best way."""
- return open(fname, "rU")
- else:
- from io import TextIOWrapper
- # Copied from the 3.2 stdlib:
- def open_source(fname):
- """Open a file in read only mode using the encoding detected by
- detect_encoding().
- """
- buffer = open(fname, 'rb')
- encoding, _ = detect_encoding(buffer.readline)
- buffer.seek(0)
- text = TextIOWrapper(buffer, encoding, line_buffering=True)
- text.mode = 'r'
- return text
+ try:
+ detect_encoding = tokenize.detect_encoding # pylint: disable=E1101
+ except AttributeError:
+ assert 3 == 4
+ def open_source(fname):
+ """Open a source file the best way."""
+ return open(fname, "rU")
+ else:
+ from io import TextIOWrapper
+ # Copied from the 3.2 stdlib:
+ def open_source(fname):
+ """Open a file in read only mode using the encoding detected by
+ detect_encoding().
+ """
+ buffer = open(fname, 'rb')
+ encoding, _ = detect_encoding(buffer.readline)
+ buffer.seek(0)
+ text = TextIOWrapper(buffer, encoding, line_buffering=True)
+ text.mode = 'r'
+ return text
+else:
+ def open_source(fname):
+ """Open a source file the best way."""
+ return open(fname, "rU")
+
# Python 3.x is picky about bytes and strings, so provide methods to
# get them right, and make them no-ops in 2.x
@@ -117,27 +125,6 @@ else:
"""Convert bytes `b` to a string (no-op in 2.x)."""
return b
-# A few details about writing encoded text are different in 2.x and 3.x.
-if sys.version_info >= (3, 0):
- def write_encoded(fname, text, encoding='utf8', errors='strict'):
- '''Write string `text` to file names `fname`, with encoding.'''
- # Don't use "with", so that this file is still good for old 2.x.
- f = open(fname, 'w', encoding=encoding, errors=errors)
- try:
- f.write(text)
- finally:
- f.close()
-else:
- # It's not clear that using utf8 strings in 2.x is the right thing to do.
- def write_encoded(fname, text, encoding='utf8', errors='strict'):
- '''Write utf8 string `text` to file names `fname`, with encoding.'''
- import codecs
- f = codecs.open(fname, 'w', encoding=encoding, errors=errors)
- try:
- f.write(text.decode('utf8'))
- finally:
- f.close()
-
# Md5 is available in different places.
try:
import hashlib
diff --git a/coverage/html.py b/coverage/html.py
index 00b92587..09683ad8 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -1,11 +1,11 @@
"""HTML reporting for Coverage."""
-import os, re, shutil
+import os, re, shutil, sys
import coverage
-from coverage.backward import pickle, write_encoded
+from coverage.backward import pickle
from coverage.misc import CoverageException, Hasher
-from coverage.phystokens import source_token_lines
+from coverage.phystokens import source_token_lines, source_encoding
from coverage.report import Reporter
from coverage.templite import Templite
@@ -100,7 +100,11 @@ class HtmlReporter(Reporter):
def write_html(self, fname, html):
"""Write `html` to `fname`, properly encoded."""
- write_encoded(fname, html, 'ascii', 'xmlcharrefreplace')
+ fout = open(fname, "wb")
+ try:
+ fout.write(html.encode('ascii', 'xmlcharrefreplace'))
+ finally:
+ fout.close()
def file_hash(self, source, cu):
"""Compute a hash that changes if the file needs to be re-reported."""
@@ -128,6 +132,12 @@ class HtmlReporter(Reporter):
self.status.set_file_hash(flat_rootname, this_hash)
+ # If need be, determine the encoding of the source file. We use it
+ # later to properly write the HTML.
+ if sys.version_info < (3, 0):
+ encoding = source_encoding(source)
+
+ # Get the numbers for this file.
nums = analysis.numbers
missing_branch_arcs = analysis.missing_branch_arcs()
@@ -195,6 +205,8 @@ class HtmlReporter(Reporter):
html_path = os.path.join(self.directory, html_filename)
html = spaceless(self.source_tmpl.render(locals()))
+ if sys.version_info < (3, 0):
+ html = html.decode(encoding)
self.write_html(html_path, html)
# Save this file's information for the index file.
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index fc4f2c90..850f78bd 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -1,6 +1,6 @@
"""Better tokenizing for coverage.py."""
-import keyword, re, token, tokenize
+import codecs, keyword, re, sys, token, tokenize
from coverage.backward import StringIO # pylint: disable=W0622
def phys_tokens(toks):
@@ -106,3 +106,101 @@ def source_token_lines(source):
if line:
yield line
+
+def source_encoding(source):
+ """Determine the encoding for `source` (a string), according to PEP 263.
+
+ Returns a string, the name of the encoding.
+
+ """
+ # Note: this function should never be called on Python 3, since py3 has
+ # built-in tools to do this.
+ assert sys.version_info < (3, 0)
+
+ # This is mostly code adapted rom Py3.2's tokenize module.
+
+ cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
+
+ # Do this so the detect_encode code we copied will work.
+ readline = iter(source.splitlines()).next
+
+ def _get_normal_name(orig_enc):
+ """Imitates get_normal_name in tokenizer.c."""
+ # Only care about the first 12 characters.
+ enc = orig_enc[:12].lower().replace("_", "-")
+ if re.match(r"^utf-8($|-)", enc):
+ return "utf-8"
+ if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):
+ return "iso-8859-1"
+ return orig_enc
+
+ # From detect_encode():
+ # It detects the encoding from the presence of a utf-8 bom or an encoding
+ # cookie as specified in pep-0263. If both a bom and a cookie are present,
+ # but disagree, a SyntaxError will be raised. If the encoding cookie is an
+ # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
+ # 'utf-8-sig' is returned.
+
+ # If no encoding is specified, then the default will be returned. The
+ # default varied with version.
+
+ if sys.version_info <= (2, 4):
+ default = 'iso-8859-1'
+ else:
+ default = 'ascii'
+
+ bom_found = False
+ encoding = None
+
+ def read_or_stop():
+ """Get the next source line, or ''."""
+ try:
+ return readline()
+ except StopIteration:
+ return ''
+
+ def find_cookie(line):
+ """Find an encoding cookie in `line`."""
+ try:
+ line_string = line.decode('ascii')
+ except UnicodeDecodeError:
+ return None
+
+ matches = cookie_re.findall(line_string)
+ if not matches:
+ return None
+ encoding = _get_normal_name(matches[0])
+ try:
+ codec = codecs.lookup(encoding)
+ except LookupError:
+ # This behaviour mimics the Python interpreter
+ raise SyntaxError("unknown encoding: " + encoding)
+
+ if bom_found:
+ if codec.name != 'utf-8':
+ # This behaviour mimics the Python interpreter
+ raise SyntaxError('encoding problem: utf-8')
+ encoding += '-sig'
+ return encoding
+
+ first = read_or_stop()
+ if first.startswith(codecs.BOM_UTF8):
+ bom_found = True
+ first = first[3:]
+ default = 'utf-8-sig'
+ if not first:
+ return default
+
+ encoding = find_cookie(first)
+ if encoding:
+ return encoding
+
+ second = read_or_stop()
+ if not second:
+ return default
+
+ encoding = find_cookie(second)
+ if encoding:
+ return encoding
+
+ return default
diff --git a/test/farm/html/gold_isolatin1/index.html b/test/farm/html/gold_isolatin1/index.html
new file mode 100644
index 00000000..6e9f3ca7
--- /dev/null
+++ b/test/farm/html/gold_isolatin1/index.html
@@ -0,0 +1,89 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv='Content-Type' content='text/html; charset=utf-8'>
+ <title>Coverage report</title>
+ <link rel='stylesheet' href='style.css' type='text/css'>
+ <script type='text/javascript' src='jquery-1.4.3.min.js'></script>
+ <script type='text/javascript' src='jquery.tablesorter.min.js'></script>
+ <script type='text/javascript' src='jquery.hotkeys.js'></script>
+ <script type='text/javascript' src='coverage_html.js'></script>
+ <script type='text/javascript' charset='utf-8'>
+ jQuery(document).ready(coverage.index_ready);
+ </script>
+</head>
+<body id='indexfile'>
+
+<div id='header'>
+ <div class='content'>
+ <h1>Coverage report:
+ <span class='pc_cov'>100%</span>
+ </h1>
+ <img id='keyboard_icon' src='keybd_closed.png'>
+ </div>
+</div>
+
+<div class='help_panel'>
+ <img id='panel_icon' src='keybd_open.png'>
+ <p class='legend'>Hot-keys on this page</p>
+ <div>
+ <p class='keyhelp'>
+ <span class='key'>n</span>
+ <span class='key'>s</span>
+ <span class='key'>m</span>
+ <span class='key'>x</span>
+
+ <span class='key'>c</span> &nbsp; change column sorting
+ </p>
+ </div>
+</div>
+
+<div id='index'>
+ <table class='index'>
+ <thead>
+
+ <tr class='tablehead' title='Click to sort'>
+ <th class='name left headerSortDown shortkey_n'>Module</th>
+ <th class='shortkey_s'>statements</th>
+ <th class='shortkey_m'>missing</th>
+ <th class='shortkey_x'>excluded</th>
+
+ <th class='right shortkey_c'>coverage</th>
+ </tr>
+ </thead>
+
+ <tfoot>
+ <tr class='total'>
+ <td class='name left'>Total</td>
+ <td>2</td>
+ <td>0</td>
+ <td>0</td>
+
+ <td class='right'>100%</td>
+ </tr>
+ </tfoot>
+ <tbody>
+
+ <tr class='file'>
+ <td class='name left'><a href='isolatin1.html'>isolatin1</a></td>
+ <td>2</td>
+ <td>0</td>
+ <td>0</td>
+
+ <td class='right'>100%</td>
+ </tr>
+
+ </tbody>
+ </table>
+</div>
+
+<div id='footer'>
+ <div class='content'>
+ <p>
+ <a class='nav' href='http://nedbatchelder.com/code/coverage/3.5.2b1'>coverage.py v3.5.2b1</a>
+ </p>
+ </div>
+</div>
+
+</body>
+</html>
diff --git a/test/farm/html/gold_isolatin1/isolatin1.html b/test/farm/html/gold_isolatin1/isolatin1.html
new file mode 100644
index 00000000..276a6c25
--- /dev/null
+++ b/test/farm/html/gold_isolatin1/isolatin1.html
@@ -0,0 +1,91 @@
+<!doctype html PUBLIC "-//W3C//DTD html 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv='Content-Type' content='text/html; charset=utf-8'>
+
+
+ <meta http-equiv='X-UA-Compatible' content='IE=emulateIE7' />
+ <title>Coverage for isolatin1: 100%</title>
+ <link rel='stylesheet' href='style.css' type='text/css'>
+ <script type='text/javascript' src='jquery-1.4.3.min.js'></script>
+ <script type='text/javascript' src='jquery.hotkeys.js'></script>
+ <script type='text/javascript' src='jquery.isonscreen.js'></script>
+ <script type='text/javascript' src='coverage_html.js'></script>
+ <script type='text/javascript' charset='utf-8'>
+ jQuery(document).ready(coverage.pyfile_ready);
+ </script>
+</head>
+<body id='pyfile'>
+
+<div id='header'>
+ <div class='content'>
+ <h1>Coverage for <b>isolatin1</b> :
+ <span class='pc_cov'>100%</span>
+ </h1>
+ <img id='keyboard_icon' src='keybd_closed.png'>
+ <h2 class='stats'>
+ 2 statements
+ <span class='run hide_run shortkey_r' onclick='coverage.toggle_lines(this, "run")'>2 run</span>
+ <span class='mis shortkey_m' onclick='coverage.toggle_lines(this, "mis")'>0 missing</span>
+ <span class='exc shortkey_x' onclick='coverage.toggle_lines(this, "exc")'>0 excluded</span>
+
+ </h2>
+ </div>
+</div>
+
+<div class='help_panel'>
+ <img id='panel_icon' src='keybd_open.png'>
+<p class='legend'>Hot-keys on this page</p>
+ <div>
+<p class='keyhelp'>
+ <span class='key'>r</span>
+ <span class='key'>m</span>
+ <span class='key'>x</span>
+ <span class='key'>p</span> &nbsp; toggle line displays
+ </p>
+<p class='keyhelp'>
+ <span class='key'>j</span>
+ <span class='key'>k</span> &nbsp; next/prev highlighted chunk
+ </p>
+<p class='keyhelp'>
+ <span class='key'>0</span> &nbsp; (zero) top of page
+ </p>
+<p class='keyhelp'>
+ <span class='key'>1</span> &nbsp; (one) first highlighted chunk
+ </p>
+ </div>
+</div>
+
+<div id='source'>
+ <table cellspacing='0' cellpadding='0'>
+ <tr>
+ <td class='linenos' valign='top'>
+<p id='n1' class='pln'><a href='#n1'>1</a></p>
+<p id='n2' class='pln'><a href='#n2'>2</a></p>
+<p id='n3' class='pln'><a href='#n3'>3</a></p>
+<p id='n4' class='stm run hide_run'><a href='#n4'>4</a></p>
+<p id='n5' class='stm run hide_run'><a href='#n5'>5</a></p>
+
+ </td>
+ <td class='text' valign='top'>
+<p id='t1' class='pln'><span class='com'># A python source file in another encoding.</span><span class='strut'>&nbsp;</span></p>
+<p id='t2' class='pln'><span class='com'># -*- coding: iso8859-1 -*-</span><span class='strut'>&nbsp;</span></p>
+<p id='t3' class='pln'><span class='strut'>&nbsp;</span></p>
+<p id='t4' class='stm run hide_run'><span class='nam'>math</span> <span class='op'>=</span> <span class='str'>&quot;3&#215;4 = 12, &#247;2 = 6&#177;0&quot;</span><span class='strut'>&nbsp;</span></p>
+<p id='t5' class='stm run hide_run'><span class='key'>assert</span> <span class='nam'>len</span><span class='op'>(</span><span class='nam'>math</span><span class='op'>)</span> <span class='op'>==</span> <span class='num'>18</span><span class='strut'>&nbsp;</span></p>
+
+ </td>
+ </tr>
+ </table>
+</div>
+
+<div id='footer'>
+ <div class='content'>
+ <p>
+ <a class='nav' href='index.html'>&#xab; index</a> &nbsp; &nbsp; <a class='nav' href='http://nedbatchelder.com/code/coverage/3.5.2b1'>coverage.py v3.5.2b1</a>
+ </p>
+ </div>
+</div>
+
+</body>
+</html>
diff --git a/test/farm/html/run_isolatin1.py b/test/farm/html/run_isolatin1.py
new file mode 100644
index 00000000..3d4b23c5
--- /dev/null
+++ b/test/farm/html/run_isolatin1.py
@@ -0,0 +1,21 @@
+import sys
+
+def html_it():
+ """Run coverage and make an HTML report for unicode.py."""
+ import coverage
+ cov = coverage.coverage()
+ cov.start()
+ import isolatin1
+ cov.stop()
+ cov.html_report(isolatin1, directory="../html_isolatin1")
+
+runfunc(html_it, rundir="src")
+
+# HTML files will change often. Check that the sizes are reasonable,
+# and check that certain key strings are in the output.
+compare("gold_isolatin1", "html_isolatin1", size_within=10, file_pattern="*.html")
+contains("html_isolatin1/isolatin1.html",
+ "<span class='str'>&quot;3&#215;4 = 12, &#247;2 = 6&#177;0&quot;</span>",
+ )
+
+clean("html_isolatin1")
diff --git a/test/farm/html/src/isolatin1.py b/test/farm/html/src/isolatin1.py
new file mode 100644
index 00000000..057c097b
--- /dev/null
+++ b/test/farm/html/src/isolatin1.py
@@ -0,0 +1,5 @@
+# A python source file in another encoding.
+# -*- coding: iso8859-1 -*-
+
+math = "3×4 = 12, ÷2 = 6±0"
+assert len(math) == 18