From e743e94e540341465175a489e82f67d3da071e77 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Wed, 23 Oct 2013 22:19:26 -0400 Subject: Use sets as much as possible to speed HTML reports. Seems to be a 10% speedup. --- coverage/parser.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'coverage/parser.py') diff --git a/coverage/parser.py b/coverage/parser.py index 581c851..7459eef 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -175,16 +175,18 @@ class CodeParser(object): first_line = line return first_line - def first_lines(self, lines, ignore=None): + def first_lines(self, lines, *ignores): """Map the line numbers in `lines` to the correct first line of the statement. - Skip any line mentioned in `ignore`. + Skip any line mentioned in any of the sequences in `ignores`. - Returns a sorted list of the first lines. + Returns a set of the first lines. """ - ignore = ignore or [] + ignore = set() + for ign in ignores: + ignore.update(ign) lset = set() for l in lines: if l in ignore: @@ -192,13 +194,13 @@ class CodeParser(object): new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) - return sorted(lset) + return lset def parse_source(self): """Parse source text to find executable lines, excluded lines, etc. - Return values are 1) a sorted list of executable line numbers, and - 2) a sorted list of excluded line numbers. + Return values are 1) a set of executable line numbers, and 2) a set of + excluded line numbers. Reported line numbers are normalized to the first line of multi-line statements. @@ -215,8 +217,11 @@ class CodeParser(object): ) excluded_lines = self.first_lines(self.excluded) - ignore = excluded_lines + list(self.docstrings) - lines = self.first_lines(self.statement_starts, ignore) + lines = self.first_lines( + self.statement_starts, + excluded_lines, + self.docstrings + ) return lines, excluded_lines -- cgit v1.2.1 From bde6d2060bebcf7c8a3a365f9b9c01a9d801dbe9 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Sat, 26 Oct 2013 22:08:47 -0400 Subject: Cache generate_tokens to speed HTML reports. --- coverage/parser.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'coverage/parser.py') diff --git a/coverage/parser.py b/coverage/parser.py index 7459eef..ed8f379 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -108,7 +108,7 @@ class CodeParser(object): first_line = None empty = True - tokgen = tokenize.generate_tokens(StringIO(self.text).readline) + tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( @@ -669,3 +669,31 @@ class Chunk(object): return "<%d+%d @%d%s %r>" % ( self.byte, self.length, self.line, bang, list(self.exits) ) + + +class CachedTokenizer(object): + """A one-element cache around tokenize.generate_tokens. + + When reporting, coverage.py tokenizes files twice, once to find the + structure of the file, and once to syntax-color it. Tokenizing is + expensive, and easily cached. + + This is a one-element cache so that our twice-in-a-row tokenizing doesn't + actually tokenize twice. + + """ + def __init__(self): + self.last_text = None + self.last_tokens = None + + def generate_tokens(self, text): + """A stand-in for `tokenize.generate_tokens`.""" + if text != self.last_text: + self.last_text = text + self.last_tokens = list( + tokenize.generate_tokens(StringIO(text).readline) + ) + return self.last_tokens + +# Create our generate_tokens cache as a callable replacement function. +generate_tokens = CachedTokenizer().generate_tokens -- cgit v1.2.1 From 7d23342011606460e5a22f189814702efa95d690 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Sun, 27 Oct 2013 09:06:41 -0400 Subject: ByteCodes.__iter__ is expensive, do it once instead of twice. --- coverage/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'coverage/parser.py') diff --git a/coverage/parser.py b/coverage/parser.py index ed8f379..7a145a2 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -449,14 +449,15 @@ class ByteParser(object): # Get a set of all of the jump-to points. jump_to = set() - for bc in ByteCodes(self.code.co_code): + bytecodes = list(ByteCodes(self.code.co_code)) + for bc in bytecodes: if bc.jump_to >= 0: jump_to.add(bc.jump_to) chunk_lineno = 0 # Walk the byte codes building chunks. - for bc in ByteCodes(self.code.co_code): + for bc in bytecodes: # Maybe have to start a new chunk start_new_chunk = False first_chunk = False -- cgit v1.2.1