summaryrefslogtreecommitdiff
path: root/coverage/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/parser.py')
-rw-r--r--coverage/parser.py254
1 files changed, 154 insertions, 100 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index 01b38af3..b090f02d 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -9,13 +9,13 @@ from coverage.misc import nice_pair, CoverageException, NoSource, expensive
class CodeParser(object):
"""Parse code to find executable lines, excluded lines, etc."""
-
+
def __init__(self, text=None, filename=None, exclude=None):
"""
Source can be provided as `text`, the text itself, or `filename`, from
- which text will be read. Excluded lines are those that match `exclude`,
- a regex.
-
+ which text will be read. Excluded lines are those that match
+ `exclude`, a regex.
+
"""
assert text or filename, "CodeParser needs either text or filename"
self.filename = filename or "<code>"
@@ -33,7 +33,7 @@ class CodeParser(object):
self.text = self.text.replace('\r\n', '\n')
self.exclude = exclude
-
+
self.show_tokens = False
# The text lines of the parsed code.
@@ -41,22 +41,22 @@ class CodeParser(object):
# The line numbers of excluded lines of code.
self.excluded = set()
-
+
# The line numbers of docstring lines.
self.docstrings = set()
-
+
# The line numbers of class definitions.
self.classdefs = set()
# A dict mapping line numbers to (lo,hi) for multi-line statements.
self.multiline = {}
-
+
# The line numbers that start statements.
self.statement_starts = set()
# Lazily-created ByteParser
self._byte_parser = None
-
+
def _get_byte_parser(self):
"""Create a ByteParser on demand."""
if not self._byte_parser:
@@ -67,9 +67,9 @@ class CodeParser(object):
def _raw_parse(self):
"""Parse the source to find the interesting facts about its lines.
-
+
A handful of member fields are updated.
-
+
"""
# Find lines which match an exclusion pattern.
if self.exclude:
@@ -77,7 +77,7 @@ class CodeParser(object):
for i, ltext in enumerate(self.lines):
if re_exclude.search(ltext):
self.excluded.add(i+1)
-
+
# Tokenize, to find excluded suites, to find docstrings, and to find
# multi-line statements.
indent = 0
@@ -88,7 +88,7 @@ class CodeParser(object):
tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
- if self.show_tokens:
+ if self.show_tokens: # pragma: no cover
print("%10s %5s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
nice_pair((slineno, elineno)), ttext, ltext
@@ -111,7 +111,9 @@ class CodeParser(object):
excluding = True
elif toktype == token.STRING and prev_toktype == token.INDENT:
# Strings that are first on an indented line are docstrings.
- # (a trick from trace.py in the stdlib.)
+ # (a trick from trace.py in the stdlib.) This works for
+ # 99.9999% of cases. For the rest (!) see:
+ # http://stackoverflow.com/questions/1769332/x/1769794#1769794
for i in range(slineno, elineno+1):
self.docstrings.add(i)
elif toktype == token.NEWLINE:
@@ -123,7 +125,7 @@ class CodeParser(object):
for l in range(first_line, elineno+1):
self.multiline[l] = rng
first_line = None
-
+
if ttext.strip() and toktype != tokenize.COMMENT:
# A non-whitespace token.
if first_line is None:
@@ -135,7 +137,7 @@ class CodeParser(object):
excluding = False
if excluding:
self.excluded.add(elineno)
-
+
prev_toktype = toktype
# Find the starts of the executable statements.
@@ -153,11 +155,11 @@ class CodeParser(object):
def first_lines(self, lines, ignore=None):
"""Map the line numbers in `lines` to the correct first line of the
statement.
-
+
Skip any line mentioned in `ignore`.
-
+
Returns a sorted list of the first lines.
-
+
"""
ignore = ignore or []
lset = set()
@@ -168,31 +170,31 @@ class CodeParser(object):
if new_l not in ignore:
lset.add(new_l)
return sorted(lset)
-
+
def parse_source(self):
"""Parse source text to find executable lines, excluded lines, etc.
Return values are 1) a sorted list of executable line numbers, and
2) a sorted list of excluded line numbers.
-
+
Reported line numbers are normalized to the first line of multi-line
statements.
-
+
"""
self._raw_parse()
-
+
excluded_lines = self.first_lines(self.excluded)
ignore = excluded_lines + list(self.docstrings)
lines = self.first_lines(self.statement_starts, ignore)
-
+
return lines, excluded_lines
def arcs(self):
"""Get information about the arcs available in the code.
-
+
Returns a sorted list of line number pairs. Line numbers have been
normalized to the first line of multiline statements.
-
+
"""
all_arcs = []
for l1, l2 in self.byte_parser._all_arcs():
@@ -205,27 +207,32 @@ class CodeParser(object):
def exit_counts(self):
"""Get a mapping from line numbers to count of exits from that line.
-
+
Excluded lines are excluded.
-
+
"""
excluded_lines = self.first_lines(self.excluded)
exit_counts = {}
for l1, l2 in self.arcs():
- if l1 == -1:
+ if l1 < 0:
+ # Don't ever report -1 as a line number
continue
if l1 in excluded_lines:
+ # Don't report excluded lines as line numbers.
+ continue
+ if l2 in excluded_lines:
+ # Arcs to excluded lines shouldn't count.
continue
if l1 not in exit_counts:
exit_counts[l1] = 0
exit_counts[l1] += 1
-
+
# Class definitions have one extra exit, so remove one for each:
for l in self.classdefs:
- # Ensure key is there - #pragma: no cover will mean its not
+ # Ensure key is there: classdefs can include excluded lines.
if l in exit_counts:
exit_counts[l] -= 1
-
+
return exit_counts
exit_counts = expensive(exit_counts)
@@ -249,6 +256,11 @@ OPS_CHUNK_END = _opcode_set(
'BREAK_LOOP', 'CONTINUE_LOOP',
)
+# Opcodes that unconditionally begin a new code chunk. By starting new chunks
+# with unconditional jump instructions, we neatly deal with jumps to jumps
+# properly.
+OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
+
# Opcodes that push a block on the block stack.
OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY')
@@ -266,6 +278,8 @@ OP_BREAK_LOOP = _opcode('BREAK_LOOP')
OP_END_FINALLY = _opcode('END_FINALLY')
OP_COMPARE_OP = _opcode('COMPARE_OP')
COMPARE_EXCEPTION = 10 # just have to get this const from the code.
+OP_LOAD_CONST = _opcode('LOAD_CONST')
+OP_RETURN_VALUE = _opcode('RETURN_VALUE')
class ByteParser(object):
@@ -294,14 +308,14 @@ class ByteParser(object):
def child_parsers(self):
"""Iterate over all the code objects nested within this one.
-
+
The iteration includes `self` as its first value.
-
+
"""
return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
- # Getting numbers from the lnotab value changed in Py3.0.
- if sys.hexversion >= 0x03000000:
+ # Getting numbers from the lnotab value changed in Py3.0.
+ if sys.version_info >= (3, 0):
def _lnotab_increments(self, lnotab):
"""Return a list of ints from the lnotab bytes in 3.x"""
return list(lnotab)
@@ -312,15 +326,15 @@ class ByteParser(object):
def _bytes_lines(self):
"""Map byte offsets to line numbers in `code`.
-
+
Uses co_lnotab described in Python/compile.c to map byte offsets to
line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
-
+
"""
# Adapted from dis.py in the standard library.
byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
-
+
bytes_lines = []
last_line_num = None
line_num = self.code.co_firstlineno
@@ -335,13 +349,13 @@ class ByteParser(object):
if line_num != last_line_num:
bytes_lines.append((byte_num, line_num))
return bytes_lines
-
+
def _find_statements(self):
"""Find the statements in `self.code`.
-
+
Return a set of line numbers that start statements. Recurses into all
code objects reachable from `self.code`.
-
+
"""
stmts = set()
for bp in self.child_parsers():
@@ -349,12 +363,12 @@ class ByteParser(object):
for _, l in bp._bytes_lines():
stmts.add(l)
return stmts
-
- def _disassemble(self):
+
+ def _disassemble(self): # pragma: no cover
"""Disassemble code, for ad-hoc experimenting."""
-
+
import dis
-
+
for bp in self.child_parsers():
print("\n%s: " % bp.code)
dis.dis(bp.code)
@@ -364,41 +378,52 @@ class ByteParser(object):
def _split_into_chunks(self):
"""Split the code object into a list of `Chunk` objects.
-
+
Each chunk is only entered at its first instruction, though there can
be many exits from a chunk.
-
+
Returns a list of `Chunk` objects.
-
+
"""
# The list of chunks so far, and the one we're working on.
chunks = []
chunk = None
bytes_lines_map = dict(self._bytes_lines())
-
+
# The block stack: loops and try blocks get pushed here for the
# implicit jumps that can occur.
# Each entry is a tuple: (block type, destination)
block_stack = []
-
+
# Some op codes are followed by branches that should be ignored. This
# is a count of how many ignores are left.
ignore_branch = 0
+ # We have to handle the last two bytecodes specially.
+ ult = penult = None
+
for bc in ByteCodes(self.code.co_code):
- # Maybe have to start a new block
+ # Maybe have to start a new chunk
if bc.offset in bytes_lines_map:
+ # Start a new chunk for each source line number.
if chunk:
chunk.exits.add(bc.offset)
chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
chunks.append(chunk)
-
+ elif bc.op in OPS_CHUNK_BEGIN:
+ # Jumps deserve their own unnumbered chunk. This fixes
+ # problems with jumps to jumps getting confused.
+ if chunk:
+ chunk.exits.add(bc.offset)
+ chunk = Chunk(bc.offset)
+ chunks.append(chunk)
+
if not chunk:
chunk = Chunk(bc.offset)
chunks.append(chunk)
- # Look at the opcode
+ # Look at the opcode
if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
if ignore_branch:
# Someone earlier wanted us to ignore this branch.
@@ -406,10 +431,10 @@ class ByteParser(object):
else:
# The opcode has a jump, it's an exit for this chunk.
chunk.exits.add(bc.jump_to)
-
+
if bc.op in OPS_CODE_END:
# The opcode can exit the code object.
- chunk.exits.add(-1)
+ chunk.exits.add(-self.code.co_firstlineno)
if bc.op in OPS_PUSH_BLOCK:
# The opcode adds a block to the block_stack.
block_stack.append((bc.op, bc.jump_to))
@@ -438,8 +463,32 @@ class ByteParser(object):
# This is an except clause. We want to overlook the next
# branch, so that except's don't count as branches.
ignore_branch += 1
-
+
+ penult = ult
+ ult = bc
+
+
if chunks:
+ # The last two bytecodes could be a dummy "return None" that
+ # shouldn't be counted as real code. Every Python code object seems
+ # to end with a return, and a "return None" is inserted if there
+ # isn't an explicit return in the source.
+ if ult and penult:
+ if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
+ if self.code.co_consts[penult.arg] is None:
+ # This is "return None", but is it dummy? A real line
+ # would be a last chunk all by itself.
+ if chunks[-1].byte != penult.offset:
+ exit = -self.code.co_firstlineno
+ # Split the last chunk
+ last_chunk = chunks[-1]
+ last_chunk.exits.remove(exit)
+ last_chunk.exits.add(penult.offset)
+ chunk = Chunk(penult.offset)
+ chunk.exits.add(exit)
+ chunks.append(chunk)
+
+ # Give all the chunks a length.
chunks[-1].length = bc.next_offset - chunks[-1].byte
for i in range(len(chunks)-1):
chunks[i].length = chunks[i+1].byte - chunks[i].byte
@@ -448,35 +497,35 @@ class ByteParser(object):
def _arcs(self):
"""Find the executable arcs in the code.
-
+
Returns a set of pairs, (from,to). From and to are integer line
- numbers. If from is -1, then the arc is an entrance into the code
- object. If to is -1, the arc is an exit from the code object.
-
+ numbers. If from is < 0, then the arc is an entrance into the code
+ object. If to is < 0, the arc is an exit from the code object.
+
"""
chunks = self._split_into_chunks()
-
+
# A map from byte offsets to chunks jumped into.
byte_chunks = dict([(c.byte, c) for c in chunks])
# Build a map from byte offsets to actual lines reached.
- byte_lines = {-1:[-1]}
+ byte_lines = {}
bytes_to_add = set([c.byte for c in chunks])
-
+
while bytes_to_add:
byte_to_add = bytes_to_add.pop()
- if byte_to_add in byte_lines or byte_to_add == -1:
+ if byte_to_add in byte_lines or byte_to_add < 0:
continue
-
+
# Which lines does this chunk lead to?
bytes_considered = set()
bytes_to_consider = [byte_to_add]
lines = set()
-
+
while bytes_to_consider:
byte = bytes_to_consider.pop()
bytes_considered.add(byte)
-
+
# Find chunk for byte
try:
ch = byte_chunks[byte]
@@ -488,89 +537,94 @@ class ByteParser(object):
# No chunk for this byte!
raise Exception("Couldn't find chunk @ %d" % byte)
byte_chunks[byte] = ch
-
+
if ch.line:
lines.add(ch.line)
else:
for ex in ch.exits:
- if ex == -1:
- lines.add(-1)
+ if ex < 0:
+ lines.add(ex)
elif ex not in bytes_considered:
bytes_to_consider.append(ex)
bytes_to_add.update(ch.exits)
byte_lines[byte_to_add] = lines
-
+
# Figure out for each chunk where the exits go.
arcs = set()
for chunk in chunks:
if chunk.line:
for ex in chunk.exits:
- for exit_line in byte_lines[ex]:
+ if ex < 0:
+ exit_lines = [ex]
+ else:
+ exit_lines = byte_lines[ex]
+ for exit_line in exit_lines:
if chunk.line != exit_line:
arcs.add((chunk.line, exit_line))
for line in byte_lines[0]:
arcs.add((-1, line))
-
+
return arcs
-
+
def _all_chunks(self):
"""Returns a list of `Chunk` objects for this code and its children.
-
+
See `_split_into_chunks` for details.
-
+
"""
chunks = []
for bp in self.child_parsers():
chunks.extend(bp._split_into_chunks())
-
+
return chunks
def _all_arcs(self):
"""Get the set of all arcs in this code object and its children.
-
+
See `_arcs` for details.
-
+
"""
arcs = set()
for bp in self.child_parsers():
arcs.update(bp._arcs())
-
+
return arcs
class Chunk(object):
"""A sequence of bytecodes with a single entrance.
-
+
To analyze byte code, we have to divide it into chunks, sequences of byte
codes such that each basic block has only one entrance, the first
- instruction in the block.
-
+ instruction in the block.
+
This is almost the CS concept of `basic block`_, except that we're willing
to have many exits from a chunk, and "basic block" is a more cumbersome
term.
-
+
.. _basic block: http://en.wikipedia.org/wiki/Basic_block
-
- An exit of -1 means the chunk can leave the code (return).
-
+
+ An exit < 0 means the chunk can leave the code (return). The exit is
+ the negative of the starting line number of the code block.
+
"""
def __init__(self, byte, line=0):
self.byte = byte
self.line = line
self.length = 0
self.exits = set()
-
+
def __repr__(self):
return "<%d+%d @%d %r>" % (
self.byte, self.length, self.line, list(self.exits)
)
-class AdHocMain(object):
+class AdHocMain(object): # pragma: no cover
"""An ad-hoc main for code parsing experiments."""
-
+
def main(self, args):
"""A main function for trying the code from the command line."""
@@ -597,7 +651,7 @@ class AdHocMain(object):
"-t", action="store_true", dest="tokens",
help="Show tokens"
)
-
+
options, args = parser.parse_args()
if options.recursive:
if args:
@@ -612,12 +666,12 @@ class AdHocMain(object):
def adhoc_one_file(self, options, filename):
"""Process just one file."""
-
+
if options.dis or options.chunks:
try:
bp = ByteParser(filename=filename)
except CoverageException:
- _, err, _ = sys.exc_info()
+ _, err, _ = sys.exc_info()
print("%s" % (err,))
return
@@ -644,7 +698,7 @@ class AdHocMain(object):
arc_width, arc_chars = self.arc_ascii_art(arcs)
else:
arc_width, arc_chars = 0, {}
-
+
exit_counts = cp.exit_counts()
for i, ltext in enumerate(cp.lines):
@@ -668,19 +722,19 @@ class AdHocMain(object):
def arc_ascii_art(self, arcs):
"""Draw arcs as ascii art.
-
+
Returns a width of characters needed to draw all the arcs, and a
dictionary mapping line numbers to ascii strings to draw for that line.
-
+
"""
arc_chars = {}
for lfrom, lto in sorted(arcs):
- if lfrom == -1:
+ if lfrom < 0:
arc_chars[lto] = arc_chars.get(lto, '') + 'v'
- elif lto == -1:
+ elif lto < 0:
arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
else:
- if lfrom == lto-1:
+ if lfrom == lto - 1:
# Don't show obvious arcs.
continue
if lfrom < lto: