summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2009-11-21 09:59:43 -0500
committerNed Batchelder <ned@nedbatchelder.com>2009-11-21 09:59:43 -0500
commit2c31e11ba195a336e1b037f42907cd2e1c53e096 (patch)
treefa87019ede4432baa4a6f5487ec34bffdcffdebe
parent6f4a40d34152b7d99be35f4127e76c8beb4d6c06 (diff)
downloadpython-coveragepy-git-2c31e11ba195a336e1b037f42907cd2e1c53e096.tar.gz
Handle one more bizarro edge case in tokenizing source: I couldn't look at my own phystokens.py file properly!
-rw-r--r--coverage/phystokens.py8
-rw-r--r--test/stress_phystoken.txt11
-rw-r--r--test/test_phystokens.py5
3 files changed, 19 insertions, 5 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 2862490f..131b362a 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -15,6 +15,7 @@ def phys_tokens(toks):
"""
last_line = None
last_lineno = -1
+ last_ttype = None
for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
if last_lineno != elineno:
if last_line and last_line[-2:] == "\\\n":
@@ -34,7 +35,11 @@ def phys_tokens(toks):
# so we need to figure out if the backslash is already in the
# string token or not.
inject_backslash = True
- if ttype == token.STRING:
+ if last_ttype == tokenize.COMMENT:
+ # Comments like this \
+ # should never result in a new token.
+ inject_backslash = False
+ elif ttype == token.STRING:
if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\':
# It's a multiline string and the first line ends with
# a backslash, so we don't need to inject another.
@@ -49,6 +54,7 @@ def phys_tokens(toks):
last_line
)
last_line = ltext
+ last_ttype = ttype
yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext
last_lineno = elineno
diff --git a/test/stress_phystoken.txt b/test/stress_phystoken.txt
index bd6a453a..654225c3 100644
--- a/test/stress_phystoken.txt
+++ b/test/stress_phystoken.txt
@@ -19,6 +19,17 @@ fake_back = """\
ouch
"""
+# Lots of difficulty happens with code like:
+#
+# fake_back = """\
+# ouch
+# """
+#
+# Ugh, the edge cases...
+
+# What about a comment like this\
+"what's this string doing here?"
+
class C(object):
def there():
this = 5 + \
diff --git a/test/test_phystokens.py b/test/test_phystokens.py
index fa0fa043..fb0b5535 100644
--- a/test/test_phystokens.py
+++ b/test/test_phystokens.py
@@ -38,10 +38,7 @@ class PhysTokensTest(CoverageTest):
# before comparing.
source = re.sub("(?m)[ \t]+$", "", source)
tokenized = re.sub("(?m)[ \t]+$", "", tokenized)
- #if source != tokenized:
- # open("0.py", "w").write(source)
- # open("1.py", "w").write(tokenized)
- self.assertEqual(source, tokenized)
+ self.assert_multiline_equal(source, tokenized)
def check_file_tokenization(self, fname):
"""Use the contents of `fname` for `check_tokenization`."""