summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2022-12-23 09:23:55 -0500
committerNed Batchelder <ned@nedbatchelder.com>2022-12-23 10:07:53 -0500
commit35e249ff74cfcbc44889107cfcca785696dc4288 (patch)
tree69488c46c48f25c3830d33160220619bbdd9965f
parent152cdc7a2b654b16fb572856d03097580e06e127 (diff)
downloadpython-coveragepy-git-35e249ff74cfcbc44889107cfcca785696dc4288.tar.gz
fix: certain strange characters caused reporting to fail. #1512
It turns out that str.splitlines() will break text on some characters that file.readline() does not! Use readline() to read source files the same way that Python does.
-rw-r--r--CHANGES.rst4
-rw-r--r--coverage/phystokens.py3
-rw-r--r--tests/test_html.py32
3 files changed, 38 insertions, 1 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 8355fed8..026ffe07 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -23,9 +23,13 @@ Unreleased
- File pattern rules were too strict, forbidding plus signs and curly braces in
directory and file names. This is now fixed, closing `issue 1513`_.
+- Unusual Unicode or control characters in source files could prevent
+ reporting. This is now fixed, closing `issue 1512`_.
+
- The PyPy wheel now installs on PyPy 3.7, 3.8, and 3.9, closing `issue 1510`_.
.. _issue 1510: https://github.com/nedbat/coveragepy/issues/1510
+.. _issue 1512: https://github.com/nedbat/coveragepy/issues/1512
.. _issue 1513: https://github.com/nedbat/coveragepy/issues/1513
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index d1181939..2ced9de3 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -4,6 +4,7 @@
"""Better tokenizing for coverage.py."""
import ast
+import io
import keyword
import re
import token
@@ -172,7 +173,7 @@ class CachedTokenizer:
"""A stand-in for `tokenize.generate_tokens`."""
if text != self.last_text:
self.last_text = text
- readline = iter(text.splitlines(True)).__next__
+ readline = io.StringIO(text).readline
try:
self.last_tokens = list(tokenize.generate_tokens(readline))
except:
diff --git a/tests/test_html.py b/tests/test_html.py
index b49cdabb..00416769 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -469,6 +469,38 @@ class HtmlWithUnparsableFilesTest(HtmlTestHelpers, CoverageTest):
formfeed_html = self.get_html_report_content("formfeed.py")
assert "line_two" in formfeed_html
+ def test_splitlines_special_chars(self):
+ # https://github.com/nedbat/coveragepy/issues/1512
+ # See https://docs.python.org/3/library/stdtypes.html#str.splitlines for
+ # the characters splitlines treats specially that readlines does not.
+
+ # I'm not exactly sure why we need the "a" strings here, but the old
+ # code wasn't failing without them.
+ self.make_file("splitlines_is_weird.py", """\
+ test = {
+ "0b": ["\x0b0"], "a1": "this is line 2",
+ "0c": ["\x0c0"], "a2": "this is line 3",
+ "1c": ["\x1c0"], "a3": "this is line 4",
+ "1d": ["\x1d0"], "a4": "this is line 5",
+ "1e": ["\x1e0"], "a5": "this is line 6",
+ "85": ["\x850"], "a6": "this is line 7",
+ "2028": ["\u20280"], "a7": "this is line 8",
+ "2029": ["\u20290"], "a8": "this is line 9",
+ }
+ DONE = 1
+ """)
+ cov = coverage.Coverage()
+ self.start_import_stop(cov, "splitlines_is_weird")
+ cov.html_report()
+
+ the_html = self.get_html_report_content("splitlines_is_weird.py")
+ assert "DONE" in the_html
+
+ # Check that the lines are properly decoded and reported...
+ html_lines = the_html.split("\n")
+ assert any(re.search(r'id="t2".*"this is line 2"', line) for line in html_lines)
+ assert any(re.search(r'id="t9".*"this is line 9"', line) for line in html_lines)
+
class HtmlTest(HtmlTestHelpers, CoverageTest):
"""Moar HTML tests."""