summaryrefslogtreecommitdiff
path: root/coverage
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2013-10-26 22:08:47 -0400
committerNed Batchelder <ned@nedbatchelder.com>2013-10-26 22:08:47 -0400
commitb330aa88f8e461afaa455f7bdd499307678e7bd4 (patch)
tree558dd4e2c24fcfab6a42416c1efff889964a6a2e /coverage
parentc2e4447f84b7d5f1056e367b094f82065aaaff59 (diff)
downloadpython-coveragepy-git-b330aa88f8e461afaa455f7bdd499307678e7bd4.tar.gz
Cache generate_tokens to speed HTML reports.
Diffstat (limited to 'coverage')
-rw-r--r--coverage/parser.py30
-rw-r--r--coverage/phystokens.py6
2 files changed, 33 insertions, 3 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index 7459eef9..ed8f3793 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -108,7 +108,7 @@ class CodeParser(object):
first_line = None
empty = True
- tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
+ tokgen = generate_tokens(self.text)
for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
if self.show_tokens: # pragma: not covered
print("%10s %5s %-20r %r" % (
@@ -669,3 +669,31 @@ class Chunk(object):
return "<%d+%d @%d%s %r>" % (
self.byte, self.length, self.line, bang, list(self.exits)
)
+
+
+class CachedTokenizer(object):
+ """A one-element cache around tokenize.generate_tokens.
+
+ When reporting, coverage.py tokenizes files twice, once to find the
+ structure of the file, and once to syntax-color it. Tokenizing is
+ expensive, and easily cached.
+
+ This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+ actually tokenize twice.
+
+ """
+ def __init__(self):
+ self.last_text = None
+ self.last_tokens = None
+
+ def generate_tokens(self, text):
+ """A stand-in for `tokenize.generate_tokens`."""
+ if text != self.last_text:
+ self.last_text = text
+ self.last_tokens = list(
+ tokenize.generate_tokens(StringIO(text).readline)
+ )
+ return self.last_tokens
+
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index f7c099ef..99b1d5ba 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -1,7 +1,9 @@
"""Better tokenizing for coverage.py."""
import codecs, keyword, re, sys, token, tokenize
-from coverage.backward import set, StringIO # pylint: disable=W0622
+from coverage.backward import set # pylint: disable=W0622
+from coverage.parser import generate_tokens
+
def phys_tokens(toks):
"""Return all physical tokens, even line continuations.
@@ -78,7 +80,7 @@ def source_token_lines(source):
line = []
col = 0
source = source.expandtabs(8).replace('\r\n', '\n')
- tokgen = tokenize.generate_tokens(StringIO(source).readline)
+ tokgen = generate_tokens(source)
for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
mark_start = True
for part in re.split('(\n)', ttext):