summaryrefslogtreecommitdiff
path: root/coverage/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/parser.py')
-rw-r--r--coverage/parser.py376
1 files changed, 231 insertions, 145 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index 0747d4a..ff066e8 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -10,11 +10,28 @@ from coverage.misc import nice_pair, CoverageException
class CodeParser:
"""Parse code to find executable lines, excluded lines, etc."""
- def __init__(self):
+ def __init__(self, text=None, filename=None, exclude=None):
+ """
+ Source can be provided as `text`, the text itself, or `filename`, from
+ which text will be read. Excluded lines are those that match `exclude`,
+ a regex.
+
+ """
+
+ assert text or filename, "CodeParser needs either text or filename"
+ self.filename = filename or "<code>"
+ if not text:
+ sourcef = open(self.filename, 'rU')
+ self.text = sourcef.read()
+ sourcef.close()
+ self.text = self.text.replace('\r\n', '\n')
+
+ self.exclude = exclude
+
self.show_tokens = False
# The text lines of the parsed code.
- self.lines = None
+ self.lines = self.text.split('\n')
# The line numbers of excluded lines of code.
self.excluded = set()
@@ -28,72 +45,15 @@ class CodeParser:
# The line numbers that start statements.
self.statement_starts = set()
- # Getting numbers from the lnotab value changed in Py3.0.
- if sys.hexversion >= 0x03000000:
- def _lnotab_increments(self, lnotab):
- """Return a list of ints from the lnotab bytes in 3.x"""
- return list(lnotab)
- else:
- def _lnotab_increments(self, lnotab):
- """Return a list of ints from the lnotab string in 2.x"""
- return [ord(c) for c in lnotab]
-
- def _bytes_lines(self, code):
- """Map byte offsets to line numbers in `code`.
-
- Uses co_lnotab described in Python/compile.c to map byte offsets to
- line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
-
- """
- # Adapted from dis.py in the standard library.
- byte_increments = self._lnotab_increments(code.co_lnotab[0::2])
- line_increments = self._lnotab_increments(code.co_lnotab[1::2])
-
- bytes_lines = []
- last_line_num = None
- line_num = code.co_firstlineno
- byte_num = 0
- for byte_incr, line_incr in zip(byte_increments, line_increments):
- if byte_incr:
- if line_num != last_line_num:
- bytes_lines.append((byte_num, line_num))
- last_line_num = line_num
- byte_num += byte_incr
- line_num += line_incr
- if line_num != last_line_num:
- bytes_lines.append((byte_num, line_num))
- return bytes_lines
-
- def _find_statements(self, code):
- """Find the statements in `code`.
-
- Update `self.statement_starts`, a set of line numbers that start
- statements. Recurses into all code objects reachable from `code`.
-
- """
- # Adapted from trace.py in the standard library.
- for co in CodeObjects(code):
- # Get all of the lineno information from this code.
- bytes_lines = self._bytes_lines(co)
- for b, l in bytes_lines:
- self.statement_starts.add(l)
-
- def _raw_parse(self, text=None, filename=None, exclude=None):
- """Parse `text` to find the interesting facts about its lines.
+ def _raw_parse(self):
+ """Parse the source to find the interesting facts about its lines.
A handful of member fields are updated.
"""
- if not text:
- sourcef = open(filename, 'rU')
- text = sourcef.read()
- sourcef.close()
- text = text.replace('\r\n', '\n')
- self.lines = text.split('\n')
-
# Find lines which match an exclusion pattern.
- if exclude:
- re_exclude = re.compile(exclude)
+ if self.exclude:
+ re_exclude = re.compile(self.exclude)
for i, ltext in enumerate(self.lines):
if re_exclude.search(ltext):
self.excluded.add(i+1)
@@ -106,7 +66,7 @@ class CodeParser:
prev_toktype = token.INDENT
first_line = None
- tokgen = tokenize.generate_tokens(StringIO(text).readline)
+ tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
if self.show_tokens:
print("%10s %5s %-20r %r" % (
@@ -154,20 +114,8 @@ class CodeParser:
prev_toktype = toktype
# Find the starts of the executable statements.
- filename = filename or "<code>"
- try:
- # Python 2.3 and 2.4 don't like partial last lines, so be sure the
- # text ends nicely for them.
- text += '\n'
- code = compile(text, filename, "exec")
- except SyntaxError:
- _, synerr, _ = sys.exc_info()
- raise CoverageException(
- "Couldn't parse '%s' as Python source: '%s' at line %d" %
- (filename, synerr.msg, synerr.lineno)
- )
-
- self._find_statements(code)
+ byte_parser = ByteParser(text=self.text, filename=self.filename)
+ self.statement_starts.update(byte_parser._find_statements())
def _map_to_first_line(self, lines, ignore=None):
"""Map the line numbers in `lines` to the correct first line of the
@@ -194,19 +142,15 @@ class CodeParser:
lines.sort()
return lines
- def parse_source(self, text=None, filename=None, exclude=None):
+ def parse_source(self):
"""Parse source text to find executable lines, excluded lines, etc.
-
- Source can be provided as `text`, the text itself, or `filename`, from
- which text will be read. Excluded lines are those that match `exclude`,
- a regex.
-
+
Return values are 1) a sorted list of executable line numbers,
2) a sorted list of excluded line numbers, and 3) a dict mapping line
numbers to pairs (lo,hi) for multi-line statements.
"""
- self._raw_parse(text, filename, exclude)
+ self._raw_parse()
excluded_lines = self._map_to_first_line(self.excluded)
ignore = excluded_lines + list(self.docstrings)
@@ -214,19 +158,94 @@ class CodeParser:
return lines, excluded_lines, self.multiline
- def _disassemble(self, code):
+
+class ByteParser:
+
+ def __init__(self, code=None, text=None, filename=None):
+
+ if code:
+ self.code = code
+ else:
+ if not text:
+ assert filename, "If no code or text, need a filename"
+ sourcef = open(filename, 'rU')
+ text = sourcef.read()
+ sourcef.close()
+
+ try:
+ # Python 2.3 and 2.4 don't like partial last lines, so be sure
+ # the text ends nicely for them.
+ self.code = compile(text + '\n', filename, "exec")
+ except SyntaxError:
+ _, synerr, _ = sys.exc_info()
+ raise CoverageException(
+ "Couldn't parse '%s' as Python source: '%s' at line %d" %
+ (filename, synerr.msg, synerr.lineno)
+ )
+
+ def child_parsers(self):
+ """Iterate over all the code objects nested within this one, starting with self."""
+ return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
+
+ # Getting numbers from the lnotab value changed in Py3.0.
+ if sys.hexversion >= 0x03000000:
+ def _lnotab_increments(self, lnotab):
+ """Return a list of ints from the lnotab bytes in 3.x"""
+ return list(lnotab)
+ else:
+ def _lnotab_increments(self, lnotab):
+ """Return a list of ints from the lnotab string in 2.x"""
+ return [ord(c) for c in lnotab]
+
+ def _bytes_lines(self):
+ """Map byte offsets to line numbers in `code`.
+
+ Uses co_lnotab described in Python/compile.c to map byte offsets to
+ line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
+
+ """
+ # Adapted from dis.py in the standard library.
+ byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
+ line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
+
+ bytes_lines = []
+ last_line_num = None
+ line_num = self.code.co_firstlineno
+ byte_num = 0
+ for byte_incr, line_incr in zip(byte_increments, line_increments):
+ if byte_incr:
+ if line_num != last_line_num:
+ bytes_lines.append((byte_num, line_num))
+ last_line_num = line_num
+ byte_num += byte_incr
+ line_num += line_incr
+ if line_num != last_line_num:
+ bytes_lines.append((byte_num, line_num))
+ return bytes_lines
+
+ def _find_statements(self):
+ """Find the statements in `self.code`.
+
+ Return a set of line numbers that start statements. Recurses into all
+ code objects reachable from `self.code`.
+
+ """
+ stmts = set()
+ for bp in self.child_parsers():
+ # Get all of the lineno information from this code.
+ for _, l in bp._bytes_lines():
+ stmts.add(l)
+ return stmts
+
+ def _disassemble(self):
"""Disassemble code, for ad-hoc experimenting."""
import dis
- for codeobj in CodeObjects(code):
- print("\n%s: " % codeobj)
- dis.dis(codeobj)
- print("Bytes lines: %r" % self._bytes_lines(codeobj))
- print("Jumps: %r %r" % self._find_byte_jumps(codeobj))
- warnings, chunks = self._split_into_chunks(codeobj)
- if warnings:
- print("WARNING: %s" % "\n".join(warnings))
+ for bp in self.child_parsers():
+ print("\n%s: " % bp.code)
+ dis.dis(bp.code)
+ print("Bytes lines: %r" % bp._bytes_lines())
print("")
@@ -241,30 +260,31 @@ class CodeParser:
last_line = l
return last_line
- def _find_byte_jumps(self, code):
- byte_jumps = [(bc.offset, bc.jump_to) for bc in ByteCodes(code.co_code) if bc.jump_to >= 0]
-
- bytes_lines = self._bytes_lines(code)
- line_jumps = [(self._line_for_byte(bytes_lines, b0), self._line_for_byte(bytes_lines, b1)) for b0, b1 in byte_jumps]
- return byte_jumps, line_jumps
-
- _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE']])
+ _code_enders = set([opcode.opmap[name] for name in ['RETURN_VALUE']])
+ _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'JUMP_FORWARD']])
+ _chunk_enders |= _code_enders
- def _split_into_chunks(self, code):
+ def _split_into_chunks(self):
class Chunk(object):
+ """A sequence of bytecodes with exits to other bytecodes.
+
+ An exit of None means the chunk can leave the code block (return).
+
+ """
def __init__(self, byte, line=0):
self.byte = byte
self.line = line
+ self.length = 0
self.exits = set()
def __repr__(self):
- return "<%d:%d %r>" % (self.byte, self.line, list(self.exits))
+ return "<%d:%d(%d) %r>" % (self.byte, self.line, self.length, list(self.exits))
chunks = []
chunk = None
- bytes_lines_map = dict(self._bytes_lines(code))
-
- for bc in ByteCodes(code.co_code):
+ bytes_lines_map = dict(self._bytes_lines())
+
+ for bc in ByteCodes(self.code.co_code):
# Maybe have to start a new block
if bc.offset in bytes_lines_map:
if chunk:
@@ -279,34 +299,99 @@ class CodeParser:
if bc.jump_to >= 0:
chunk.exits.add(bc.jump_to)
+ if bc.op in self._code_enders:
+ chunk.exits.add(None)
+
if bc.op in self._chunk_enders:
chunk = None
+
+ if chunks:
+ chunks[-1].length = bc.next_offset - chunks[-1].byte
+ for i in range(len(chunks)-1):
+ chunks[i].length = chunks[i+1].byte - chunks[i].byte
+
+ return chunks
+
+ def _arcs(self):
+ chunks = self._split_into_chunks()
- warnings = []
- # Find anonymous chunks (not associated with a line number), and find
- # the numbered chunks that jump to them.
- for ch in chunks:
- if not ch.line:
- jumpers = [c for c in chunks if ch.byte in c.exits]
- if len(jumpers) == 1:
- ch.line = jumpers[0].line
- if len(jumpers) > 1:
- warnings.append("Anon chunk at %d has %d jumpers" % (ch.byte, len(jumpers)))
- #if len(ch.exits) > 1:
- # warnings.append("Anon chunk at %d has %d exits" % (ch.byte, len(ch.exits)))
- return warnings, chunks
-
- def _all_chunks(self, code):
- warnings = []
- chunks = []
- for co in CodeObjects(code):
- warns, chs = self._split_into_chunks(co)
- warnings.extend(warns)
- chunks.extend(chs)
+ # A map from byte offsets to chunks jumped into.
+ byte_chunks = dict([(c.byte, c) for c in chunks])
+
+ # Build a map from byte offsets to actual lines reached.
+ byte_lines = {None:[None]}
+ bytes_to_add = set([c.byte for c in chunks])
- return warnings, chunks
+ while bytes_to_add:
+ byte_to_add = bytes_to_add.pop()
+ if byte_to_add in byte_lines or byte_to_add is None:
+ continue
- def adhoc_main(self, args):
+ # Which lines does this chunk lead to?
+ bytes_considered = set()
+ bytes_to_consider = [byte_to_add]
+ lines = set()
+
+ while bytes_to_consider:
+ byte = bytes_to_consider.pop()
+ bytes_considered.add(byte)
+
+ # Find chunk for byte
+ try:
+ ch = byte_chunks[byte]
+ except KeyError:
+ for ch in chunks:
+ if ch.byte <= byte < ch.byte+ch.length:
+ break
+ else:
+ # No chunk for this byte!
+ raise Exception("Couldn't find a chunk for byte %d" % byte)
+ byte_chunks[byte] = ch
+
+ if ch.line:
+ lines.add(ch.line)
+ else:
+ for ex in ch.exits:
+ if ex is None:
+ lines.add(None)
+ elif ex not in bytes_considered:
+ bytes_to_consider.append(ex)
+
+ bytes_to_add.update(ch.exits)
+
+ byte_lines[byte_to_add] = lines
+
+ # Figure out for each chunk where the exits go.
+ arcs = set()
+ for chunk in chunks:
+ if chunk.line:
+ for ex in chunk.exits:
+ for exit_line in byte_lines[ex]:
+ arcs.add((chunk.line, exit_line))
+ for line in byte_lines[0]:
+ arcs.add((None, line))
+
+ return arcs
+
+ def _all_chunks(self):
+ chunks = []
+ for bp in self.child_parsers():
+ chunks.extend(bp._split_into_chunks())
+
+ return [], chunks
+
+ def _all_arcs(self):
+ arcs = []
+ for bp in self.child_parsers():
+ arcs.extend(bp._arcs())
+
+ return arcs
+
+
+class AdHocMain(object):
+ """An ad-hoc main for code parsing experiments."""
+
+ def main(self, args):
"""A main function for trying the code from the command line."""
from optparse import OptionParser
@@ -341,21 +426,20 @@ class CodeParser:
self.adhoc_one_file(options, args[0])
def adhoc_one_file(self, options, filename):
- if options.dis or options.chunks:
- source = open(filename, "rU").read() + "\n\n"
+ if options.dis or options.chunks:
try:
- code = compile(source, filename, "exec")
- except SyntaxError:
+ bp = ByteParser(filename=filename)
+ except CoverageException:
_, err, _ = sys.exc_info()
- print("** Couldn't compile %s: %s" % (filename, err))
+ print("%s" % (err,))
return
if options.dis:
print("Main code:")
- self._disassemble(code)
+ bp._disassemble()
if options.chunks:
- warnings, chunks = self._all_chunks(code)
+ warnings, chunks = bp._all_chunks()
if options.recursive:
print("%6d: %s" % (len(chunks), filename))
if warnings:
@@ -363,22 +447,24 @@ class CodeParser:
else:
print(warnings)
print(chunks)
+ print(bp._all_arcs())
- self.show_tokens = options.tokens
- self._raw_parse(filename=filename, exclude=r"no\s*cover")
+ if options.source or options.tokens:
+ cp = CodeParser(filename=filename, exclude=r"no\s*cover")
+ cp.show_tokens = options.tokens
+ cp._raw_parse()
- if options.source:
- for i, ltext in enumerate(self.lines):
+ for i, ltext in enumerate(cp.lines):
lineno = i+1
m0 = m1 = m2 = ' '
- if lineno in self.statement_starts:
+ if lineno in cp.statement_starts:
m0 = '-'
- if lineno in self.docstrings:
+ if lineno in cp.docstrings:
m1 = '"'
- if lineno in self.excluded:
+ if lineno in cp.excluded:
m2 = 'x'
print("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext))
if __name__ == '__main__':
- CodeParser().adhoc_main(sys.argv[1:])
+ AdHocMain().main(sys.argv[1:])