diff options
-rw-r--r-- | coverage/parser.py | 160 | ||||
-rw-r--r-- | lab/disgen.py | 260 | ||||
-rw-r--r-- | lab/parser.py | 177 |
3 files changed, 442 insertions, 155 deletions
diff --git a/coverage/parser.py b/coverage/parser.py index b65689c4..d380eda1 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -1,6 +1,6 @@ """Code parsing for Coverage.""" -import glob, opcode, os, re, sys, token, tokenize +import opcode, re, sys, token, tokenize from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 from coverage.backward import open_source @@ -314,6 +314,7 @@ class ByteParser(object): def __init__(self, code=None, text=None, filename=None): if code: self.code = code + self.text = text else: if not text: assert filename, "If no code or text, need a filename" @@ -322,6 +323,7 @@ class ByteParser(object): text = sourcef.read() finally: sourcef.close() + self.text = text try: # Python 2.3 and 2.4 don't like partial last lines, so be sure @@ -350,7 +352,8 @@ class ByteParser(object): The iteration includes `self` as its first value. """ - return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) + children = CodeObjects(self.code) + return [ByteParser(code=c, text=self.text) for c in children] # Getting numbers from the lnotab value changed in Py3.0. if sys.version_info >= (3, 0): @@ -402,18 +405,6 @@ class ByteParser(object): stmts.add(l) return stmts - def _disassemble(self): # pragma: no cover - """Disassemble code, for ad-hoc experimenting.""" - - import dis - - for bp in self.child_parsers(): - print("\n%s: " % bp.code) - dis.dis(bp.code) - print("Bytes lines: %r" % bp._bytes_lines()) - - print("") - def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. @@ -657,144 +648,3 @@ class Chunk(object): return "<%d+%d @%d %r>" % ( self.byte, self.length, self.line, list(self.exits) ) - - -class AdHocMain(object): # pragma: no cover - """An ad-hoc main for code parsing experiments.""" - - def main(self, args): - """A main function for trying the code from the command line.""" - - from optparse import OptionParser - - parser = OptionParser() - parser.add_option( - "-c", action="store_true", dest="chunks", - help="Show basic block chunks" - ) - parser.add_option( - "-d", action="store_true", dest="dis", - help="Disassemble" - ) - parser.add_option( - "-R", action="store_true", dest="recursive", - help="Recurse to find source files" - ) - parser.add_option( - "-s", action="store_true", dest="source", - help="Show analyzed source" - ) - parser.add_option( - "-t", action="store_true", dest="tokens", - help="Show tokens" - ) - - options, args = parser.parse_args() - if options.recursive: - if args: - root = args[0] - else: - root = "." - for root, _, _ in os.walk(root): - for f in glob.glob(root + "/*.py"): - self.adhoc_one_file(options, f) - else: - self.adhoc_one_file(options, args[0]) - - def adhoc_one_file(self, options, filename): - """Process just one file.""" - - if options.dis or options.chunks: - try: - bp = ByteParser(filename=filename) - except CoverageException: - _, err, _ = sys.exc_info() - print("%s" % (err,)) - return - - if options.dis: - print("Main code:") - bp._disassemble() - - if options.chunks: - chunks = bp._all_chunks() - if options.recursive: - print("%6d: %s" % (len(chunks), filename)) - else: - print("Chunks: %r" % chunks) - arcs = bp._all_arcs() - print("Arcs: %r" % sorted(arcs)) - - if options.source or options.tokens: - cp = CodeParser(filename=filename, exclude=r"no\s*cover") - cp.show_tokens = options.tokens - cp._raw_parse() - - if options.source: - if options.chunks: - arc_width, arc_chars = self.arc_ascii_art(arcs) - else: - arc_width, arc_chars = 0, {} - - exit_counts = cp.exit_counts() - - for i, ltext in enumerate(cp.lines): - lineno = i+1 - m0 = m1 = m2 = m3 = a = ' ' - if lineno in cp.statement_starts: - m0 = '-' - exits = exit_counts.get(lineno, 0) - if exits > 1: - m1 = str(exits) - if lineno in cp.docstrings: - m2 = '"' - if lineno in cp.classdefs: - m2 = 'C' - if lineno in cp.excluded: - m3 = 'x' - a = arc_chars.get(lineno, '').ljust(arc_width) - print("%4d %s%s%s%s%s %s" % - (lineno, m0, m1, m2, m3, a, ltext) - ) - - def arc_ascii_art(self, arcs): - """Draw arcs as ascii art. - - Returns a width of characters needed to draw all the arcs, and a - dictionary mapping line numbers to ascii strings to draw for that line. - - """ - arc_chars = {} - for lfrom, lto in sorted(arcs): - if lfrom < 0: - arc_chars[lto] = arc_chars.get(lto, '') + 'v' - elif lto < 0: - arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' - else: - if lfrom == lto - 1: - # Don't show obvious arcs. - continue - if lfrom < lto: - l1, l2 = lfrom, lto - else: - l1, l2 = lto, lfrom - w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) - for l in range(l1, l2+1): - if l == lfrom: - ch = '<' - elif l == lto: - ch = '>' - else: - ch = '|' - arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch - arc_width = 0 - - if arc_chars: - arc_width = max([len(a) for a in arc_chars.values()]) - else: - arc_width = 0 - - return arc_width, arc_chars - -if __name__ == '__main__': - AdHocMain().main(sys.argv[1:]) diff --git a/lab/disgen.py b/lab/disgen.py new file mode 100644 index 00000000..82844eac --- /dev/null +++ b/lab/disgen.py @@ -0,0 +1,260 @@ +"""Disassembler of Python byte code into mnemonics.""" + +# Adapted from stdlib dis.py, but returns structured information +# instead of printing to stdout. + +import sys +import types +import collections + +from opcode import * +from opcode import __all__ as _opcodes_all + +__all__ = ["dis", "disassemble", "distb", "disco", + "findlinestarts", "findlabels"] + _opcodes_all +del _opcodes_all + +_have_code = (types.MethodType, types.FunctionType, types.CodeType, + types.ClassType, type) + +def dis(x=None): + for disline in disgen(x): + if disline.first and disline.offset > 0: + print + print format_dis_line(disline) + +def format_dis_line(disline): + if disline.first: + lineno = "%3d" % disline.lineno + else: + lineno = " " + if disline.target: + label = ">>" + else: + label = " " + if disline.oparg is not None: + oparg = repr(disline.oparg) + else: + oparg = "" + return "%s %s %4r %-20s %5s %s" % (lineno, label, disline.offset, disline.opcode, oparg, disline.argstr) + +def disgen(x=None): + """Disassemble classes, methods, functions, or code. + + With no argument, disassemble the last traceback. + + """ + if x is None: + return distb() + if isinstance(x, types.InstanceType): + x = x.__class__ + if hasattr(x, 'im_func'): + x = x.im_func + if hasattr(x, 'func_code'): + x = x.func_code + if hasattr(x, '__dict__'): + assert False + items = x.__dict__.items() + items.sort() + for name, x1 in items: + if isinstance(x1, _have_code): + print "Disassembly of %s:" % name + try: + dis(x1) + except TypeError, msg: + print "Sorry:", msg + print + elif hasattr(x, 'co_code'): + return disassemble(x) + elif isinstance(x, str): + assert False + disassemble_string(x) + else: + raise TypeError, \ + "don't know how to disassemble %s objects" % \ + type(x).__name__ + +def distb(tb=None): + """Disassemble a traceback (default: last traceback).""" + if tb is None: + try: + tb = sys.last_traceback + except AttributeError: + raise RuntimeError, "no last traceback to disassemble" + while tb.tb_next: tb = tb.tb_next + return disassemble(tb.tb_frame.f_code, tb.tb_lasti) + +DisLine = collections.namedtuple( + 'DisLine', + "lineno first target offset opcode oparg argstr" + ) + +def disassemble(co, lasti=-1): + """Disassemble a code object.""" + code = co.co_code + labels = findlabels(code) + linestarts = dict(findlinestarts(co)) + n = len(code) + i = 0 + extended_arg = 0 + free = None + + dislines = [] + lineno = linestarts[0] + + while i < n: + c = code[i] + op = ord(c) + first = i in linestarts + if first: + lineno = linestarts[i] + + #if i == lasti: print '-->', + #else: print ' ', + target = i in labels + offset = i + opcode = opname[op] + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg + extended_arg = 0 + i = i+2 + if op == EXTENDED_ARG: + extended_arg = oparg*65536L + if op in hasconst: + argstr = '(' + repr(co.co_consts[oparg]) + ')' + elif op in hasname: + argstr = '(' + co.co_names[oparg] + ')' + elif op in hasjrel: + argstr = '(to ' + repr(i + oparg) + ')' + elif op in haslocal: + argstr = '(' + co.co_varnames[oparg] + ')' + elif op in hascompare: + argstr = '(' + cmp_op[oparg] + ')' + elif op in hasfree: + if free is None: + free = co.co_cellvars + co.co_freevars + argstr = '(' + free[oparg] + ')' + else: + argstr = "" + else: + oparg = None + argstr = "" + yield DisLine(lineno=lineno, first=first, target=target, offset=offset, opcode=opcode, oparg=oparg, argstr=argstr) + + +def disassemble_string(code, lasti=-1, varnames=None, names=None, + constants=None): + labels = findlabels(code) + n = len(code) + i = 0 + while i < n: + c = code[i] + op = ord(c) + if i == lasti: print '-->', + else: print ' ', + if i in labels: print '>>', + else: print ' ', + print repr(i).rjust(4), + print opname[op].ljust(15), + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + print repr(oparg).rjust(5), + if op in hasconst: + if constants: + print '(' + repr(constants[oparg]) + ')', + else: + print '(%d)'%oparg, + elif op in hasname: + if names is not None: + print '(' + names[oparg] + ')', + else: + print '(%d)'%oparg, + elif op in hasjrel: + print '(to ' + repr(i + oparg) + ')', + elif op in haslocal: + if varnames: + print '(' + varnames[oparg] + ')', + else: + print '(%d)' % oparg, + elif op in hascompare: + print '(' + cmp_op[oparg] + ')', + print + +disco = disassemble # XXX For backwards compatibility + +def findlabels(code): + """Detect all offsets in a byte code which are jump targets. + + Return the list of offsets. + + """ + labels = [] + n = len(code) + i = 0 + while i < n: + c = code[i] + op = ord(c) + i = i+1 + if op >= HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + label = -1 + if op in hasjrel: + label = i+oparg + elif op in hasjabs: + label = oparg + if label >= 0: + if label not in labels: + labels.append(label) + return labels + +def findlinestarts(code): + """Find the offsets in a byte code which are start of lines in the source. + + Generate pairs (offset, lineno) as described in Python/compile.c. + + """ + byte_increments = [ord(c) for c in code.co_lnotab[0::2]] + line_increments = [ord(c) for c in code.co_lnotab[1::2]] + + lastlineno = None + lineno = code.co_firstlineno + addr = 0 + for byte_incr, line_incr in zip(byte_increments, line_increments): + if byte_incr: + if lineno != lastlineno: + yield (addr, lineno) + lastlineno = lineno + addr += byte_incr + lineno += line_incr + if lineno != lastlineno: + yield (addr, lineno) + +def _test(): + """Simple test program to disassemble a file.""" + if sys.argv[1:]: + if sys.argv[2:]: + sys.stderr.write("usage: python dis.py [-|file]\n") + sys.exit(2) + fn = sys.argv[1] + if not fn or fn == "-": + fn = None + else: + fn = None + if fn is None: + f = sys.stdin + else: + f = open(fn) + source = f.read() + if fn is not None: + f.close() + else: + fn = "<stdin>" + code = compile(source, fn, "exec") + dis(code) + +if __name__ == "__main__": + _test() diff --git a/lab/parser.py b/lab/parser.py new file mode 100644 index 00000000..67c41e80 --- /dev/null +++ b/lab/parser.py @@ -0,0 +1,177 @@ +"""Parser.py: a main for invoking code in coverage/parser.py""" + +import glob, os, sys +from optparse import OptionParser + +import disgen + +from coverage.misc import CoverageException +from coverage.parser import ByteParser, CodeParser + + +class AdHocMain(object): + """An ad-hoc main for code parsing experiments.""" + + def main(self, args): + """A main function for trying the code from the command line.""" + + parser = OptionParser() + parser.add_option( + "-c", action="store_true", dest="chunks", + help="Show basic block chunks" + ) + parser.add_option( + "-d", action="store_true", dest="dis", + help="Disassemble" + ) + parser.add_option( + "-R", action="store_true", dest="recursive", + help="Recurse to find source files" + ) + parser.add_option( + "-s", action="store_true", dest="source", + help="Show analyzed source" + ) + parser.add_option( + "-t", action="store_true", dest="tokens", + help="Show tokens" + ) + + options, args = parser.parse_args() + if options.recursive: + if args: + root = args[0] + else: + root = "." + for root, _, _ in os.walk(root): + for f in glob.glob(root + "/*.py"): + self.adhoc_one_file(options, f) + else: + self.adhoc_one_file(options, args[0]) + + def adhoc_one_file(self, options, filename): + """Process just one file.""" + + if options.dis or options.chunks: + try: + bp = ByteParser(filename=filename) + except CoverageException: + _, err, _ = sys.exc_info() + print("%s" % (err,)) + return + + if options.dis: + print("Main code:") + self.disassemble(bp) + + if options.chunks: + chunks = bp._all_chunks() + if options.recursive: + print("%6d: %s" % (len(chunks), filename)) + else: + print("Chunks: %r" % chunks) + arcs = bp._all_arcs() + print("Arcs: %r" % sorted(arcs)) + + if options.source or options.tokens: + cp = CodeParser(filename=filename, exclude=r"no\s*cover") + cp.show_tokens = options.tokens + cp._raw_parse() + + if options.source: + if options.chunks: + arc_width, arc_chars = self.arc_ascii_art(arcs) + else: + arc_width, arc_chars = 0, {} + + exit_counts = cp.exit_counts() + + for i, ltext in enumerate(cp.lines): + lineno = i+1 + m0 = m1 = m2 = m3 = a = ' ' + if lineno in cp.statement_starts: + m0 = '-' + exits = exit_counts.get(lineno, 0) + if exits > 1: + m1 = str(exits) + if lineno in cp.docstrings: + m2 = '"' + if lineno in cp.classdefs: + m2 = 'C' + if lineno in cp.excluded: + m3 = 'x' + a = arc_chars.get(lineno, '').ljust(arc_width) + print("%4d %s%s%s%s%s %s" % + (lineno, m0, m1, m2, m3, a, ltext) + ) + + def disassemble(self, byte_parser): + """Disassemble code, for ad-hoc experimenting.""" + + for bp in byte_parser.child_parsers(): + chunks = bp._split_into_chunks() + chunkd = dict((chunk.byte, chunk) for chunk in chunks) + if bp.text: + srclines = bp.text.splitlines() + else: + srclines = None + print("\n%s: " % bp.code) + for disline in disgen.disgen(bp.code): + if disline.first: + if srclines: + print("%80s%s" % ("", srclines[disline.lineno-1])) + elif disline.offset > 0: + print("") + line = disgen.format_dis_line(disline) + chunk = chunkd.get(disline.offset) + if chunk: + exits = " ".join(str(e) for e in sorted(chunk.exits)) + chunkstr = ": %s" % exits + else: + chunkstr = "" + print("%-70s%s" % (line, chunkstr)) + + print("") + + def arc_ascii_art(self, arcs): + """Draw arcs as ascii art. + + Returns a width of characters needed to draw all the arcs, and a + dictionary mapping line numbers to ascii strings to draw for that line. + + """ + arc_chars = {} + for lfrom, lto in sorted(arcs): + if lfrom < 0: + arc_chars[lto] = arc_chars.get(lto, '') + 'v' + elif lto < 0: + arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' + else: + if lfrom == lto - 1: + # Don't show obvious arcs. + continue + if lfrom < lto: + l1, l2 = lfrom, lto + else: + l1, l2 = lto, lfrom + w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) + for l in range(l1, l2+1): + if l == lfrom: + ch = '<' + elif l == lto: + ch = '>' + else: + ch = '|' + arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch + arc_width = 0 + + if arc_chars: + arc_width = max([len(a) for a in arc_chars.values()]) + else: + arc_width = 0 + + return arc_width, arc_chars + +if __name__ == '__main__': + AdHocMain().main(sys.argv[1:]) + |