diff options
-rw-r--r-- | coverage/parser.py | 30 | ||||
-rw-r--r-- | coverage/phystokens.py | 6 |
2 files changed, 33 insertions, 3 deletions
diff --git a/coverage/parser.py b/coverage/parser.py index 7459eef9..ed8f3793 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -108,7 +108,7 @@ class CodeParser(object): first_line = None empty = True - tokgen = tokenize.generate_tokens(StringIO(self.text).readline) + tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( @@ -669,3 +669,31 @@ class Chunk(object): return "<%d+%d @%d%s %r>" % ( self.byte, self.length, self.line, bang, list(self.exits) ) + + +class CachedTokenizer(object): + """A one-element cache around tokenize.generate_tokens. + + When reporting, coverage.py tokenizes files twice, once to find the + structure of the file, and once to syntax-color it. Tokenizing is + expensive, and easily cached. + + This is a one-element cache so that our twice-in-a-row tokenizing doesn't + actually tokenize twice. + + """ + def __init__(self): + self.last_text = None + self.last_tokens = None + + def generate_tokens(self, text): + """A stand-in for `tokenize.generate_tokens`.""" + if text != self.last_text: + self.last_text = text + self.last_tokens = list( + tokenize.generate_tokens(StringIO(text).readline) + ) + return self.last_tokens + +# Create our generate_tokens cache as a callable replacement function. +generate_tokens = CachedTokenizer().generate_tokens diff --git a/coverage/phystokens.py b/coverage/phystokens.py index f7c099ef..99b1d5ba 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -1,7 +1,9 @@ """Better tokenizing for coverage.py.""" import codecs, keyword, re, sys, token, tokenize -from coverage.backward import set, StringIO # pylint: disable=W0622 +from coverage.backward import set # pylint: disable=W0622 +from coverage.parser import generate_tokens + def phys_tokens(toks): """Return all physical tokens, even line continuations. @@ -78,7 +80,7 @@ def source_token_lines(source): line = [] col = 0 source = source.expandtabs(8).replace('\r\n', '\n') - tokgen = tokenize.generate_tokens(StringIO(source).readline) + tokgen = generate_tokens(source) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): |