diff options
Diffstat (limited to 'coverage/parser.py')
-rw-r--r-- | coverage/parser.py | 254 |
1 files changed, 154 insertions, 100 deletions
diff --git a/coverage/parser.py b/coverage/parser.py index 01b38af3..b090f02d 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -9,13 +9,13 @@ from coverage.misc import nice_pair, CoverageException, NoSource, expensive class CodeParser(object): """Parse code to find executable lines, excluded lines, etc.""" - + def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from - which text will be read. Excluded lines are those that match `exclude`, - a regex. - + which text will be read. Excluded lines are those that match + `exclude`, a regex. + """ assert text or filename, "CodeParser needs either text or filename" self.filename = filename or "<code>" @@ -33,7 +33,7 @@ class CodeParser(object): self.text = self.text.replace('\r\n', '\n') self.exclude = exclude - + self.show_tokens = False # The text lines of the parsed code. @@ -41,22 +41,22 @@ class CodeParser(object): # The line numbers of excluded lines of code. self.excluded = set() - + # The line numbers of docstring lines. self.docstrings = set() - + # The line numbers of class definitions. self.classdefs = set() # A dict mapping line numbers to (lo,hi) for multi-line statements. self.multiline = {} - + # The line numbers that start statements. self.statement_starts = set() # Lazily-created ByteParser self._byte_parser = None - + def _get_byte_parser(self): """Create a ByteParser on demand.""" if not self._byte_parser: @@ -67,9 +67,9 @@ class CodeParser(object): def _raw_parse(self): """Parse the source to find the interesting facts about its lines. - + A handful of member fields are updated. - + """ # Find lines which match an exclusion pattern. if self.exclude: @@ -77,7 +77,7 @@ class CodeParser(object): for i, ltext in enumerate(self.lines): if re_exclude.search(ltext): self.excluded.add(i+1) - + # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 @@ -88,7 +88,7 @@ class CodeParser(object): tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: - if self.show_tokens: + if self.show_tokens: # pragma: no cover print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext @@ -111,7 +111,9 @@ class CodeParser(object): excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. - # (a trick from trace.py in the stdlib.) + # (a trick from trace.py in the stdlib.) This works for + # 99.9999% of cases. For the rest (!) see: + # http://stackoverflow.com/questions/1769332/x/1769794#1769794 for i in range(slineno, elineno+1): self.docstrings.add(i) elif toktype == token.NEWLINE: @@ -123,7 +125,7 @@ class CodeParser(object): for l in range(first_line, elineno+1): self.multiline[l] = rng first_line = None - + if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. if first_line is None: @@ -135,7 +137,7 @@ class CodeParser(object): excluding = False if excluding: self.excluded.add(elineno) - + prev_toktype = toktype # Find the starts of the executable statements. @@ -153,11 +155,11 @@ class CodeParser(object): def first_lines(self, lines, ignore=None): """Map the line numbers in `lines` to the correct first line of the statement. - + Skip any line mentioned in `ignore`. - + Returns a sorted list of the first lines. - + """ ignore = ignore or [] lset = set() @@ -168,31 +170,31 @@ class CodeParser(object): if new_l not in ignore: lset.add(new_l) return sorted(lset) - + def parse_source(self): """Parse source text to find executable lines, excluded lines, etc. Return values are 1) a sorted list of executable line numbers, and 2) a sorted list of excluded line numbers. - + Reported line numbers are normalized to the first line of multi-line statements. - + """ self._raw_parse() - + excluded_lines = self.first_lines(self.excluded) ignore = excluded_lines + list(self.docstrings) lines = self.first_lines(self.statement_starts, ignore) - + return lines, excluded_lines def arcs(self): """Get information about the arcs available in the code. - + Returns a sorted list of line number pairs. Line numbers have been normalized to the first line of multiline statements. - + """ all_arcs = [] for l1, l2 in self.byte_parser._all_arcs(): @@ -205,27 +207,32 @@ class CodeParser(object): def exit_counts(self): """Get a mapping from line numbers to count of exits from that line. - + Excluded lines are excluded. - + """ excluded_lines = self.first_lines(self.excluded) exit_counts = {} for l1, l2 in self.arcs(): - if l1 == -1: + if l1 < 0: + # Don't ever report -1 as a line number continue if l1 in excluded_lines: + # Don't report excluded lines as line numbers. + continue + if l2 in excluded_lines: + # Arcs to excluded lines shouldn't count. continue if l1 not in exit_counts: exit_counts[l1] = 0 exit_counts[l1] += 1 - + # Class definitions have one extra exit, so remove one for each: for l in self.classdefs: - # Ensure key is there - #pragma: no cover will mean its not + # Ensure key is there: classdefs can include excluded lines. if l in exit_counts: exit_counts[l] -= 1 - + return exit_counts exit_counts = expensive(exit_counts) @@ -249,6 +256,11 @@ OPS_CHUNK_END = _opcode_set( 'BREAK_LOOP', 'CONTINUE_LOOP', ) +# Opcodes that unconditionally begin a new code chunk. By starting new chunks +# with unconditional jump instructions, we neatly deal with jumps to jumps +# properly. +OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') + # Opcodes that push a block on the block stack. OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY') @@ -266,6 +278,8 @@ OP_BREAK_LOOP = _opcode('BREAK_LOOP') OP_END_FINALLY = _opcode('END_FINALLY') OP_COMPARE_OP = _opcode('COMPARE_OP') COMPARE_EXCEPTION = 10 # just have to get this const from the code. +OP_LOAD_CONST = _opcode('LOAD_CONST') +OP_RETURN_VALUE = _opcode('RETURN_VALUE') class ByteParser(object): @@ -294,14 +308,14 @@ class ByteParser(object): def child_parsers(self): """Iterate over all the code objects nested within this one. - + The iteration includes `self` as its first value. - + """ return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) - # Getting numbers from the lnotab value changed in Py3.0. - if sys.hexversion >= 0x03000000: + # Getting numbers from the lnotab value changed in Py3.0. + if sys.version_info >= (3, 0): def _lnotab_increments(self, lnotab): """Return a list of ints from the lnotab bytes in 3.x""" return list(lnotab) @@ -312,15 +326,15 @@ class ByteParser(object): def _bytes_lines(self): """Map byte offsets to line numbers in `code`. - + Uses co_lnotab described in Python/compile.c to map byte offsets to line numbers. Returns a list: [(b0, l0), (b1, l1), ...] - + """ # Adapted from dis.py in the standard library. byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) - + bytes_lines = [] last_line_num = None line_num = self.code.co_firstlineno @@ -335,13 +349,13 @@ class ByteParser(object): if line_num != last_line_num: bytes_lines.append((byte_num, line_num)) return bytes_lines - + def _find_statements(self): """Find the statements in `self.code`. - + Return a set of line numbers that start statements. Recurses into all code objects reachable from `self.code`. - + """ stmts = set() for bp in self.child_parsers(): @@ -349,12 +363,12 @@ class ByteParser(object): for _, l in bp._bytes_lines(): stmts.add(l) return stmts - - def _disassemble(self): + + def _disassemble(self): # pragma: no cover """Disassemble code, for ad-hoc experimenting.""" - + import dis - + for bp in self.child_parsers(): print("\n%s: " % bp.code) dis.dis(bp.code) @@ -364,41 +378,52 @@ class ByteParser(object): def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. - + Each chunk is only entered at its first instruction, though there can be many exits from a chunk. - + Returns a list of `Chunk` objects. - + """ # The list of chunks so far, and the one we're working on. chunks = [] chunk = None bytes_lines_map = dict(self._bytes_lines()) - + # The block stack: loops and try blocks get pushed here for the # implicit jumps that can occur. # Each entry is a tuple: (block type, destination) block_stack = [] - + # Some op codes are followed by branches that should be ignored. This # is a count of how many ignores are left. ignore_branch = 0 + # We have to handle the last two bytecodes specially. + ult = penult = None + for bc in ByteCodes(self.code.co_code): - # Maybe have to start a new block + # Maybe have to start a new chunk if bc.offset in bytes_lines_map: + # Start a new chunk for each source line number. if chunk: chunk.exits.add(bc.offset) chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) chunks.append(chunk) - + elif bc.op in OPS_CHUNK_BEGIN: + # Jumps deserve their own unnumbered chunk. This fixes + # problems with jumps to jumps getting confused. + if chunk: + chunk.exits.add(bc.offset) + chunk = Chunk(bc.offset) + chunks.append(chunk) + if not chunk: chunk = Chunk(bc.offset) chunks.append(chunk) - # Look at the opcode + # Look at the opcode if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: if ignore_branch: # Someone earlier wanted us to ignore this branch. @@ -406,10 +431,10 @@ class ByteParser(object): else: # The opcode has a jump, it's an exit for this chunk. chunk.exits.add(bc.jump_to) - + if bc.op in OPS_CODE_END: # The opcode can exit the code object. - chunk.exits.add(-1) + chunk.exits.add(-self.code.co_firstlineno) if bc.op in OPS_PUSH_BLOCK: # The opcode adds a block to the block_stack. block_stack.append((bc.op, bc.jump_to)) @@ -438,8 +463,32 @@ class ByteParser(object): # This is an except clause. We want to overlook the next # branch, so that except's don't count as branches. ignore_branch += 1 - + + penult = ult + ult = bc + + if chunks: + # The last two bytecodes could be a dummy "return None" that + # shouldn't be counted as real code. Every Python code object seems + # to end with a return, and a "return None" is inserted if there + # isn't an explicit return in the source. + if ult and penult: + if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: + if self.code.co_consts[penult.arg] is None: + # This is "return None", but is it dummy? A real line + # would be a last chunk all by itself. + if chunks[-1].byte != penult.offset: + exit = -self.code.co_firstlineno + # Split the last chunk + last_chunk = chunks[-1] + last_chunk.exits.remove(exit) + last_chunk.exits.add(penult.offset) + chunk = Chunk(penult.offset) + chunk.exits.add(exit) + chunks.append(chunk) + + # Give all the chunks a length. chunks[-1].length = bc.next_offset - chunks[-1].byte for i in range(len(chunks)-1): chunks[i].length = chunks[i+1].byte - chunks[i].byte @@ -448,35 +497,35 @@ class ByteParser(object): def _arcs(self): """Find the executable arcs in the code. - + Returns a set of pairs, (from,to). From and to are integer line - numbers. If from is -1, then the arc is an entrance into the code - object. If to is -1, the arc is an exit from the code object. - + numbers. If from is < 0, then the arc is an entrance into the code + object. If to is < 0, the arc is an exit from the code object. + """ chunks = self._split_into_chunks() - + # A map from byte offsets to chunks jumped into. byte_chunks = dict([(c.byte, c) for c in chunks]) # Build a map from byte offsets to actual lines reached. - byte_lines = {-1:[-1]} + byte_lines = {} bytes_to_add = set([c.byte for c in chunks]) - + while bytes_to_add: byte_to_add = bytes_to_add.pop() - if byte_to_add in byte_lines or byte_to_add == -1: + if byte_to_add in byte_lines or byte_to_add < 0: continue - + # Which lines does this chunk lead to? bytes_considered = set() bytes_to_consider = [byte_to_add] lines = set() - + while bytes_to_consider: byte = bytes_to_consider.pop() bytes_considered.add(byte) - + # Find chunk for byte try: ch = byte_chunks[byte] @@ -488,89 +537,94 @@ class ByteParser(object): # No chunk for this byte! raise Exception("Couldn't find chunk @ %d" % byte) byte_chunks[byte] = ch - + if ch.line: lines.add(ch.line) else: for ex in ch.exits: - if ex == -1: - lines.add(-1) + if ex < 0: + lines.add(ex) elif ex not in bytes_considered: bytes_to_consider.append(ex) bytes_to_add.update(ch.exits) byte_lines[byte_to_add] = lines - + # Figure out for each chunk where the exits go. arcs = set() for chunk in chunks: if chunk.line: for ex in chunk.exits: - for exit_line in byte_lines[ex]: + if ex < 0: + exit_lines = [ex] + else: + exit_lines = byte_lines[ex] + for exit_line in exit_lines: if chunk.line != exit_line: arcs.add((chunk.line, exit_line)) for line in byte_lines[0]: arcs.add((-1, line)) - + return arcs - + def _all_chunks(self): """Returns a list of `Chunk` objects for this code and its children. - + See `_split_into_chunks` for details. - + """ chunks = [] for bp in self.child_parsers(): chunks.extend(bp._split_into_chunks()) - + return chunks def _all_arcs(self): """Get the set of all arcs in this code object and its children. - + See `_arcs` for details. - + """ arcs = set() for bp in self.child_parsers(): arcs.update(bp._arcs()) - + return arcs class Chunk(object): """A sequence of bytecodes with a single entrance. - + To analyze byte code, we have to divide it into chunks, sequences of byte codes such that each basic block has only one entrance, the first - instruction in the block. - + instruction in the block. + This is almost the CS concept of `basic block`_, except that we're willing to have many exits from a chunk, and "basic block" is a more cumbersome term. - + .. _basic block: http://en.wikipedia.org/wiki/Basic_block - - An exit of -1 means the chunk can leave the code (return). - + + An exit < 0 means the chunk can leave the code (return). The exit is + the negative of the starting line number of the code block. + """ def __init__(self, byte, line=0): self.byte = byte self.line = line self.length = 0 self.exits = set() - + def __repr__(self): return "<%d+%d @%d %r>" % ( self.byte, self.length, self.line, list(self.exits) ) -class AdHocMain(object): +class AdHocMain(object): # pragma: no cover """An ad-hoc main for code parsing experiments.""" - + def main(self, args): """A main function for trying the code from the command line.""" @@ -597,7 +651,7 @@ class AdHocMain(object): "-t", action="store_true", dest="tokens", help="Show tokens" ) - + options, args = parser.parse_args() if options.recursive: if args: @@ -612,12 +666,12 @@ class AdHocMain(object): def adhoc_one_file(self, options, filename): """Process just one file.""" - + if options.dis or options.chunks: try: bp = ByteParser(filename=filename) except CoverageException: - _, err, _ = sys.exc_info() + _, err, _ = sys.exc_info() print("%s" % (err,)) return @@ -644,7 +698,7 @@ class AdHocMain(object): arc_width, arc_chars = self.arc_ascii_art(arcs) else: arc_width, arc_chars = 0, {} - + exit_counts = cp.exit_counts() for i, ltext in enumerate(cp.lines): @@ -668,19 +722,19 @@ class AdHocMain(object): def arc_ascii_art(self, arcs): """Draw arcs as ascii art. - + Returns a width of characters needed to draw all the arcs, and a dictionary mapping line numbers to ascii strings to draw for that line. - + """ arc_chars = {} for lfrom, lto in sorted(arcs): - if lfrom == -1: + if lfrom < 0: arc_chars[lto] = arc_chars.get(lto, '') + 'v' - elif lto == -1: + elif lto < 0: arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' else: - if lfrom == lto-1: + if lfrom == lto - 1: # Don't show obvious arcs. continue if lfrom < lto: |