diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2022-12-23 09:23:55 -0500 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2022-12-23 10:07:53 -0500 |
commit | 35e249ff74cfcbc44889107cfcca785696dc4288 (patch) | |
tree | 69488c46c48f25c3830d33160220619bbdd9965f | |
parent | 152cdc7a2b654b16fb572856d03097580e06e127 (diff) | |
download | python-coveragepy-git-35e249ff74cfcbc44889107cfcca785696dc4288.tar.gz |
fix: certain strange characters caused reporting to fail. #1512
It turns out that str.splitlines() will break text on some characters
that file.readline() does not! Use readline() to read source files the
same way that Python does.
-rw-r--r-- | CHANGES.rst | 4 | ||||
-rw-r--r-- | coverage/phystokens.py | 3 | ||||
-rw-r--r-- | tests/test_html.py | 32 |
3 files changed, 38 insertions, 1 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 8355fed8..026ffe07 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,9 +23,13 @@ Unreleased - File pattern rules were too strict, forbidding plus signs and curly braces in directory and file names. This is now fixed, closing `issue 1513`_. +- Unusual Unicode or control characters in source files could prevent + reporting. This is now fixed, closing `issue 1512`_. + - The PyPy wheel now installs on PyPy 3.7, 3.8, and 3.9, closing `issue 1510`_. .. _issue 1510: https://github.com/nedbat/coveragepy/issues/1510 +.. _issue 1512: https://github.com/nedbat/coveragepy/issues/1512 .. _issue 1513: https://github.com/nedbat/coveragepy/issues/1513 diff --git a/coverage/phystokens.py b/coverage/phystokens.py index d1181939..2ced9de3 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -4,6 +4,7 @@ """Better tokenizing for coverage.py.""" import ast +import io import keyword import re import token @@ -172,7 +173,7 @@ class CachedTokenizer: """A stand-in for `tokenize.generate_tokens`.""" if text != self.last_text: self.last_text = text - readline = iter(text.splitlines(True)).__next__ + readline = io.StringIO(text).readline try: self.last_tokens = list(tokenize.generate_tokens(readline)) except: diff --git a/tests/test_html.py b/tests/test_html.py index b49cdabb..00416769 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -469,6 +469,38 @@ class HtmlWithUnparsableFilesTest(HtmlTestHelpers, CoverageTest): formfeed_html = self.get_html_report_content("formfeed.py") assert "line_two" in formfeed_html + def test_splitlines_special_chars(self): + # https://github.com/nedbat/coveragepy/issues/1512 + # See https://docs.python.org/3/library/stdtypes.html#str.splitlines for + # the characters splitlines treats specially that readlines does not. + + # I'm not exactly sure why we need the "a" strings here, but the old + # code wasn't failing without them. + self.make_file("splitlines_is_weird.py", """\ + test = { + "0b": ["\x0b0"], "a1": "this is line 2", + "0c": ["\x0c0"], "a2": "this is line 3", + "1c": ["\x1c0"], "a3": "this is line 4", + "1d": ["\x1d0"], "a4": "this is line 5", + "1e": ["\x1e0"], "a5": "this is line 6", + "85": ["\x850"], "a6": "this is line 7", + "2028": ["\u20280"], "a7": "this is line 8", + "2029": ["\u20290"], "a8": "this is line 9", + } + DONE = 1 + """) + cov = coverage.Coverage() + self.start_import_stop(cov, "splitlines_is_weird") + cov.html_report() + + the_html = self.get_html_report_content("splitlines_is_weird.py") + assert "DONE" in the_html + + # Check that the lines are properly decoded and reported... + html_lines = the_html.split("\n") + assert any(re.search(r'id="t2".*"this is line 2"', line) for line in html_lines) + assert any(re.search(r'id="t9".*"this is line 9"', line) for line in html_lines) + class HtmlTest(HtmlTestHelpers, CoverageTest): """Moar HTML tests.""" |