diff options
-rw-r--r-- | CHANGES.rst | 4 | ||||
-rw-r--r-- | coverage/phystokens.py | 9 | ||||
-rw-r--r-- | tests/test_phystokens.py | 10 |
3 files changed, 18 insertions, 5 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 4bc6fad7..d51cfe30 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,6 +23,10 @@ Unreleased - A class named "test_something" no longer confuses the `test_function` dynamic context setting. Fixes `issue 829`_. +- Fixed an unusual tokenizing issue with backslashes in comments. Fixes + `issue 822`_. + +.. _issue 822: https://github.com/nedbat/coveragepy/issues/822 .. _issue 829: https://github.com/nedbat/coveragepy/issues/829 diff --git a/coverage/phystokens.py b/coverage/phystokens.py index ccfe63b3..b6866e7d 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -27,7 +27,7 @@ def phys_tokens(toks): """ last_line = None last_lineno = -1 - last_ttype = None + last_ttext = None for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: if last_lineno != elineno: if last_line and last_line.endswith("\\\n"): @@ -47,9 +47,7 @@ def phys_tokens(toks): # so we need to figure out if the backslash is already in the # string token or not. inject_backslash = True - if last_ttype == tokenize.COMMENT: - # Comments like this \ - # should never result in a new token. + if last_ttext.endswith("\\"): inject_backslash = False elif ttype == token.STRING: if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\': @@ -66,7 +64,8 @@ def phys_tokens(toks): last_line ) last_line = ltext - last_ttype = ttype + if ttype not in (tokenize.NEWLINE, tokenize.NL): + last_ttext = ttext yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext last_lineno = elineno diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py index 1045225e..48f8ebb7 100644 --- a/tests/test_phystokens.py +++ b/tests/test_phystokens.py @@ -43,6 +43,13 @@ MIXED_WS_TOKENS = [ [('ws', ' '), ('nam', 'b'), ('op', '='), ('str', '"indented"')], ] +# https://github.com/nedbat/coveragepy/issues/822 +BUG_822 = u"""\ +print( "Message 1" ) +array = [ 1,2,3,4, # 4 numbers \\ + 5,6,7 ] # 3 numbers +print( "Message 2" ) +""" class PhysTokensTest(CoverageTest): """Tests for coverage.py's improved tokenizer.""" @@ -78,6 +85,9 @@ class PhysTokensTest(CoverageTest): # Mixed tabs and spaces... self.assertEqual(list(source_token_lines(MIXED_WS)), MIXED_WS_TOKENS) + def test_bug_822(self): + self.check_tokenization(BUG_822) + def test_tokenize_real_file(self): # Check the tokenization of a real file (large, btw). real_file = os.path.join(TESTS_DIR, "test_coverage.py") |