"""Code parsing for Coverage.""" import glob, opcode, os, re, sys, token, tokenize from coverage.backward import set, StringIO # pylint: disable-msg=W0622 from coverage.bytecode import ByteCodes, CodeObjects from coverage.misc import nice_pair, CoverageException class CodeParser: """Parse code to find executable lines, excluded lines, etc.""" def __init__(self): self.show_tokens = False # The text lines of the parsed code. self.lines = None # The line numbers of excluded lines of code. self.excluded = set() # The line numbers of docstring lines. self.docstrings = set() # A dict mapping line numbers to (lo,hi) for multi-line statements. self.multiline = {} # The line numbers that start statements. self.statement_starts = set() # Getting numbers from the lnotab value changed in Py3.0. if sys.hexversion >= 0x03000000: def _lnotab_increments(self, lnotab): """Return a list of ints from the lnotab bytes in 3.x""" return list(lnotab) else: def _lnotab_increments(self, lnotab): """Return a list of ints from the lnotab string in 2.x""" return [ord(c) for c in lnotab] def _bytes_lines(self, code): """Map byte offsets to line numbers in `code`. Uses co_lnotab described in Python/compile.c to map byte offsets to line numbers. Returns a list: [(b0, l0), (b1, l1), ...] """ # Adapted from dis.py in the standard library. byte_increments = self._lnotab_increments(code.co_lnotab[0::2]) line_increments = self._lnotab_increments(code.co_lnotab[1::2]) bytes_lines = [] last_line_num = None line_num = code.co_firstlineno byte_num = 0 for byte_incr, line_incr in zip(byte_increments, line_increments): if byte_incr: if line_num != last_line_num: bytes_lines.append((byte_num, line_num)) last_line_num = line_num byte_num += byte_incr line_num += line_incr if line_num != last_line_num: bytes_lines.append((byte_num, line_num)) return bytes_lines def _find_statements(self, code): """Find the statements in `code`. Update `self.statement_starts`, a set of line numbers that start statements. Recurses into all code objects reachable from `code`. """ # Adapted from trace.py in the standard library. for co in CodeObjects(code): # Get all of the lineno information from this code. bytes_lines = self._bytes_lines(co) for b, l in bytes_lines: self.statement_starts.add(l) def _raw_parse(self, text=None, filename=None, exclude=None): """Parse `text` to find the interesting facts about its lines. A handful of member fields are updated. """ if not text: sourcef = open(filename, 'rU') text = sourcef.read() sourcef.close() text = text.replace('\r\n', '\n') self.lines = text.split('\n') # Find lines which match an exclusion pattern. if exclude: re_exclude = re.compile(exclude) for i, ltext in enumerate(self.lines): if re_exclude.search(ltext): self.excluded.add(i+1) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None tokgen = tokenize.generate_tokens(StringIO(text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext )) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) for i in range(slineno, elineno+1): self.docstrings.add(i) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. rng = (first_line, elineno) for l in range(first_line, elineno+1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype # Find the starts of the executable statements. filename = filename or "" try: # Python 2.3 and 2.4 don't like partial last lines, so be sure the # text ends nicely for them. text += '\n' code = compile(text, filename, "exec") except SyntaxError: _, synerr, _ = sys.exc_info() raise CoverageException( "Couldn't parse '%s' as Python source: '%s' at line %d" % (filename, synerr.msg, synerr.lineno) ) self._find_statements(code) def _map_to_first_line(self, lines, ignore=None): """Map the line numbers in `lines` to the correct first line of the statement. Skip any line mentioned in `ignore`. Returns a sorted list of the first lines. """ ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue rng = self.multiline.get(l) if rng: new_l = rng[0] else: new_l = l if new_l not in ignore: lset.add(new_l) lines = list(lset) lines.sort() return lines def parse_source(self, text=None, filename=None, exclude=None): """Parse source text to find executable lines, excluded lines, etc. Source can be provided as `text`, the text itself, or `filename`, from which text will be read. Excluded lines are those that match `exclude`, a regex. Return values are 1) a sorted list of executable line numbers, 2) a sorted list of excluded line numbers, and 3) a dict mapping line numbers to pairs (lo,hi) for multi-line statements. """ self._raw_parse(text, filename, exclude) excluded_lines = self._map_to_first_line(self.excluded) ignore = excluded_lines + list(self.docstrings) lines = self._map_to_first_line(self.statement_starts, ignore) return lines, excluded_lines, self.multiline def _disassemble(self, code): """Disassemble code, for ad-hoc experimenting.""" import dis for codeobj in CodeObjects(code): print("\n%s: " % codeobj) dis.dis(codeobj) print("Bytes lines: %r" % self._bytes_lines(codeobj)) print("Jumps: %r %r" % self._find_byte_jumps(codeobj)) warnings, chunks = self._split_into_chunks(codeobj) if warnings: print("WARNING: %s" % "\n".join(warnings)) print("") def _line_for_byte(self, bytes_lines, byte): last_line = 0 for b, l in bytes_lines: if b == byte: return l elif b > byte: return last_line else: last_line = l return last_line def _find_byte_jumps(self, code): byte_jumps = [(bc.offset, bc.jump_to) for bc in ByteCodes(code.co_code) if bc.jump_to >= 0] bytes_lines = self._bytes_lines(code) line_jumps = [(self._line_for_byte(bytes_lines, b0), self._line_for_byte(bytes_lines, b1)) for b0, b1 in byte_jumps] return byte_jumps, line_jumps _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'RETURN_VALUE']]) def _split_into_chunks(self, code): class Chunk(object): def __init__(self, byte, line=0): self.byte = byte self.line = line self.exits = set() def __repr__(self): return "<%d:%d %r>" % (self.byte, self.line, list(self.exits)) chunks = [] chunk = None bytes_lines_map = dict(self._bytes_lines(code)) for bc in ByteCodes(code.co_code): # Maybe have to start a new block if bc.offset in bytes_lines_map: if chunk: chunk.exits.add(bc.offset) chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) chunks.append(chunk) if not chunk: chunk = Chunk(bc.offset) chunks.append(chunk) if bc.jump_to >= 0: chunk.exits.add(bc.jump_to) if bc.op in self._chunk_enders: chunk = None warnings = [] # Find anonymous chunks (not associated with a line number), and find # the numbered chunks that jump to them. for ch in chunks: if not ch.line: jumpers = [c for c in chunks if ch.line in c.exits] if len(jumpers) > 1: warnings.append("Chunk at %d has %d jumpers" % (ch.byte, len(jumpers))) return warnings, chunks def _all_chunks(self, code): warnings = [] chunks = [] for co in CodeObjects(code): warns, chs = self._split_into_chunks(co) warnings.extend(warns) chunks.extend(chs) return warnings, chunks def adhoc_main(self, args): """A main function for trying the code from the command line.""" from optparse import OptionParser parser = OptionParser() parser.add_option( "-c", action="store_true", dest="chunks", help="Check byte chunks" ) parser.add_option( "-d", action="store_true", dest="dis", help="Disassemble" ) parser.add_option( "-R", action="store_true", dest="recursive", help="Recurse to find source files" ) parser.add_option( "-s", action="store_true", dest="source", help="Show analyzed source" ) parser.add_option( "-t", action="store_true", dest="tokens", help="Show tokens" ) options, args = parser.parse_args() if options.recursive: if args: root = args[0] else: root = "." for root, _, _ in os.walk(root): for f in glob.glob(root + "/*.py"): self.adhoc_one_file(options, f) else: self.adhoc_one_file(options, args[0]) def adhoc_one_file(self, options, filename): if options.dis or options.chunks: source = open(filename, "rU").read() + "\n\n" try: code = compile(source, filename, "exec") except SyntaxError: _, err, _ = sys.exc_info() print("** Couldn't compile %s: %s" % (filename, err)) return if options.dis: print("Main code:") self._disassemble(code) if options.chunks: warnings, chunks = self._all_chunks(code) if options.recursive: print("%6d: %s" % (len(chunks), filename)) if warnings: print("\t%r" % (warnings,)) else: print(warnings) print(chunks) self.show_tokens = options.tokens self._raw_parse(filename=filename, exclude=r"no\s*cover") if options.source: for i, ltext in enumerate(self.lines): lineno = i+1 m0 = m1 = m2 = ' ' if lineno in self.statement_starts: m0 = '-' if lineno in self.docstrings: m1 = '"' if lineno in self.excluded: m2 = 'x' print("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext)) if __name__ == '__main__': CodeParser().adhoc_main(sys.argv[1:])