Prevent UnicodeDecodeErrors in html report. #303

author: Ned Batchelder <ned@nedbatchelder.com> 2014-06-11 22:02:32 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2014-06-11 22:02:32 -0400
commit: be5178fda38750c0aae78061bf1b1bf3397a467d (patch)
tree: 9fecc8d7481259fd7267b208d018edb1b22daa6f
parent: 8f5aa5738644bf81c2d0547de5bbe5f77548b1a9 (diff)
download: python-coveragepy-git-be5178fda38750c0aae78061bf1b1bf3397a467d.tar.gz
3 files changed, 39 insertions, 30 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index eb110d6b..1b8ec41b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,9 @@ Change history for Coverage.py
 - The XML report will now create the output directory if need be, fixing
   `issue 285`_.  Thanks Chris Rose.
 
+- HTML reports no longer raise UnicodeDecodeError if a Python file has
+  undecodable characters, fixing `issue 303`_.
+
 - The annotate command will now annotate all files, not just ones relative to
   the current directory, fixing `issue 57`_.
 
@@ -30,6 +33,7 @@ Change history for Coverage.py
 .. _issue 94: https://bitbucket.org/ned/coveragepy/issue/94/coverage-xml-doesnt-produce-sources
 .. _issue 284: https://bitbucket.org/ned/coveragepy/issue/284/fail-under-should-show-more-precision
 .. _issue 285: https://bitbucket.org/ned/coveragepy/issue/285/xml-report-fails-if-output-file-directory
+.. _issue 303: https://bitbucket.org/ned/coveragepy/issue/303/unicodedecodeerror
 .. _issue 305: https://bitbucket.org/ned/coveragepy/issue/305/pendingdeprecationwarning-the-imp-module
 
 
diff --git a/coverage/html.py b/coverage/html.py
index 6c811107..85f47ab4 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -238,15 +238,9 @@ class HtmlReporter(Reporter):
         }))
 
         if sys.version_info < (3, 0):
-            try:
-                html = html.decode(encoding)
-            except UnicodeDecodeError as e:
-                sample = e.object[max([0, e.start-30]):e.start+30]
-                raise CoverageException(
-                    "Couldn't decode %r as %s: %r" % (
-                        cu.filename, e.encoding, sample
-                    )
-                )
+            # In theory, all the characters in the source can be decoded, but
+            # strange things happen, so use 'replace' to keep errors at bay.
+            html = html.decode(encoding, 'replace')
 
         html_filename = flat_rootname + ".html"
         html_path = os.path.join(self.directory, html_filename)
diff --git a/tests/test_html.py b/tests/test_html.py
index de967f86..8e43e7cf 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -42,6 +42,13 @@ class HtmlTestHelpers(CoverageTest):
         os.remove("htmlcov/helper1.html")
         os.remove("htmlcov/helper2.html")
 
+    def get_html_report_content(self, module):
+        """Return the content of the HTML report for `module`."""
+        filename = module.replace(".py", ".html").replace("/", "_")
+        filename = os.path.join("htmlcov", filename)
+        with open(filename) as f:
+            return f.read()
+
 
 class HtmlDeltaTest(HtmlTestHelpers, CoverageTest):
     """Tests of the HTML delta speed-ups."""
@@ -208,7 +215,7 @@ class HtmlTitleTest(HtmlTestHelpers, CoverageTest):
             )
 
 
-class HtmlWithUnparsableFilesTest(CoverageTest):
+class HtmlWithUnparsableFilesTest(HtmlTestHelpers, CoverageTest):
     """Test the behavior when measuring unparsable files."""
 
     def test_dotpy_not_python(self):
@@ -267,26 +274,30 @@ class HtmlWithUnparsableFilesTest(CoverageTest):
         cov.html_report()
         self.assert_exists("htmlcov/index.html")
 
-    if sys.version_info < (3, 0):
-        def test_decode_error(self):
-            # imp.load_module won't load a file with an undecodable character
-            # in a comment, though Python will run them.  So we'll change the
-            # file after running.
-            self.make_file("main.py", "import sub.not_ascii")
-            self.make_file("sub/__init__.py")
-            self.make_file("sub/not_ascii.py", """\
-                a = 1  # Isn't this great?
-                """)
-            cov = coverage.coverage()
-            self.start_import_stop(cov, "main")
-
-            # Create the undecodable version of the file.
-            self.make_file("sub/not_ascii.py", """\
-                a = 1  # Isn't this great?\xcb
-                """)
-            msg = r"Couldn't decode '.*sub/not_ascii.py' as ascii: .*\\xcb.*"
-            with self.assertRaisesRegex(CoverageException, msg):
-                cov.html_report()
+    def test_decode_error(self):
+        # imp.load_module won't load a file with an undecodable character
+        # in a comment, though Python will run them.  So we'll change the
+        # file after running.
+        self.make_file("main.py", "import sub.not_ascii")
+        self.make_file("sub/__init__.py")
+        self.make_file("sub/not_ascii.py", """\
+            a = 1  # Isn't this great?!
+            """)
+        cov = coverage.coverage()
+        self.start_import_stop(cov, "main")
+
+        # Create the undecodable version of the file.
+        self.make_file("sub/not_ascii.py", """\
+            a = 1  # Isn't this great?\xcb!
+            """)
+        cov.html_report()
+
+        html_report = self.get_html_report_content("sub/not_ascii.py")
+        if sys.version_info < (3, 0):
+            expected = "# Isn&#39;t this great?&#65533;!"
+        else:
+            expected = "# Isn&#39;t this great?&#203;!"
+        self.assertIn(expected, html_report)
 
 
 class HtmlTest(CoverageTest):
author	Ned Batchelder <ned@nedbatchelder.com>	2014-06-11 22:02:32 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2014-06-11 22:02:32 -0400
commit	be5178fda38750c0aae78061bf1b1bf3397a467d (patch)
tree	9fecc8d7481259fd7267b208d018edb1b22daa6f
parent	8f5aa5738644bf81c2d0547de5bbe5f77548b1a9 (diff)
download	python-coveragepy-git-be5178fda38750c0aae78061bf1b1bf3397a467d.tar.gz