Fix unusual backslash token issue. #822

author: Ned Batchelder <ned@nedbatchelder.com> 2019-07-12 10:24:35 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2019-08-10 20:43:07 -0400
commit: 1a27df4c768e5a183ddd0f890d139996ffc52778 (patch)
tree: b2096def51238409ab6fedb6c34e9acae0a08ca2
parent: 6b3017454af6662dde107755f1ee92153e69a4d8 (diff)
download: python-coveragepy-git-1a27df4c768e5a183ddd0f890d139996ffc52778.tar.gz
3 files changed, 18 insertions, 5 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 4bc6fad7..d51cfe30 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -23,6 +23,10 @@ Unreleased
 - A class named "test_something" no longer confuses the `test_function` dynamic
   context setting.  Fixes `issue 829`_.
 
+- Fixed an unusual tokenizing issue with backslashes in comments.  Fixes
+  `issue 822`_.
+
+.. _issue 822: https://github.com/nedbat/coveragepy/issues/822
 .. _issue 829: https://github.com/nedbat/coveragepy/issues/829
 
 
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index ccfe63b3..b6866e7d 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -27,7 +27,7 @@ def phys_tokens(toks):
     """
     last_line = None
     last_lineno = -1
-    last_ttype = None
+    last_ttext = None
     for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
         if last_lineno != elineno:
             if last_line and last_line.endswith("\\\n"):
@@ -47,9 +47,7 @@ def phys_tokens(toks):
                 # so we need to figure out if the backslash is already in the
                 # string token or not.
                 inject_backslash = True
-                if last_ttype == tokenize.COMMENT:
-                    # Comments like this \
-                    # should never result in a new token.
+                if last_ttext.endswith("\\"):
                     inject_backslash = False
                 elif ttype == token.STRING:
                     if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':
@@ -66,7 +64,8 @@ def phys_tokens(toks):
                         last_line
                         )
             last_line = ltext
-            last_ttype = ttype
+        if ttype not in (tokenize.NEWLINE, tokenize.NL):
+            last_ttext = ttext
         yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext
         last_lineno = elineno
 
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 1045225e..48f8ebb7 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -43,6 +43,13 @@ MIXED_WS_TOKENS = [
     [('ws', '        '), ('nam', 'b'), ('op', '='), ('str', '"indented"')],
 ]
 
+# https://github.com/nedbat/coveragepy/issues/822
+BUG_822 = u"""\
+print( "Message 1" )
+array = [ 1,2,3,4,       # 4 numbers \\
+          5,6,7 ]        # 3 numbers
+print( "Message 2" )
+"""
 
 class PhysTokensTest(CoverageTest):
     """Tests for coverage.py's improved tokenizer."""
@@ -78,6 +85,9 @@ class PhysTokensTest(CoverageTest):
         # Mixed tabs and spaces...
         self.assertEqual(list(source_token_lines(MIXED_WS)), MIXED_WS_TOKENS)
 
+    def test_bug_822(self):
+        self.check_tokenization(BUG_822)
+
     def test_tokenize_real_file(self):
         # Check the tokenization of a real file (large, btw).
         real_file = os.path.join(TESTS_DIR, "test_coverage.py")
author	Ned Batchelder <ned@nedbatchelder.com>	2019-07-12 10:24:35 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2019-08-10 20:43:07 -0400
commit	1a27df4c768e5a183ddd0f890d139996ffc52778 (patch)
tree	b2096def51238409ab6fedb6c34e9acae0a08ca2
parent	6b3017454af6662dde107755f1ee92153e69a4d8 (diff)
download	python-coveragepy-git-1a27df4c768e5a183ddd0f890d139996ffc52778.tar.gz