diff options
Diffstat (limited to 'coverage/parser.py')
-rw-r--r-- | coverage/parser.py | 227 |
1 files changed, 53 insertions, 174 deletions
diff --git a/coverage/parser.py b/coverage/parser.py index ae618ce5..d894e61c 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -1,10 +1,12 @@ """Code parsing for Coverage.""" -import glob, opcode, os, re, sys, token, tokenize +import opcode, re, sys, token, tokenize -from coverage.backward import set, sorted, StringIO # pylint: disable-msg=W0622 +from coverage.backward import set, sorted, StringIO # pylint: disable=W0622 +from coverage.backward import open_source from coverage.bytecode import ByteCodes, CodeObjects -from coverage.misc import nice_pair, CoverageException, NoSource, expensive +from coverage.misc import nice_pair, expensive, join_regex +from coverage.misc import CoverageException, NoSource, NotPython class CodeParser(object): @@ -13,7 +15,7 @@ class CodeParser(object): def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from - which text will be read. Excluded lines are those that match + which the text will be read. Excluded lines are those that match `exclude`, a regex. """ @@ -22,15 +24,20 @@ class CodeParser(object): self.text = text if not self.text: try: - sourcef = open(self.filename, 'rU') - self.text = sourcef.read() - sourcef.close() + sourcef = open_source(self.filename) + try: + self.text = sourcef.read() + finally: + sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource( "No source for code: %r: %s" % (self.filename, err) ) - self.text = self.text.replace('\r\n', '\n') + + # Scrap the BOM if it exists. + if self.text and ord(self.text[0]) == 0xfeff: + self.text = self.text[1:] self.exclude = exclude @@ -65,6 +72,21 @@ class CodeParser(object): return self._byte_parser byte_parser = property(_get_byte_parser) + def lines_matching(self, *regexes): + """Find the lines matching one of a list of regexes. + + Returns a set of line numbers, the lines that contain a match for one + of the regexes in `regexes`. The entire line needn't match, just a + part of it. + + """ + regex_c = re.compile(join_regex(regexes)) + matches = set() + for i, ltext in enumerate(self.lines): + if regex_c.search(ltext): + matches.add(i+1) + return matches + def _raw_parse(self): """Parse the source to find the interesting facts about its lines. @@ -73,10 +95,7 @@ class CodeParser(object): """ # Find lines which match an exclusion pattern. if self.exclude: - re_exclude = re.compile(self.exclude) - for i, ltext in enumerate(self.lines): - if re_exclude.search(ltext): - self.excluded.add(i+1) + self.excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. @@ -184,7 +203,15 @@ class CodeParser(object): statements. """ - self._raw_parse() + try: + self._raw_parse() + except (tokenize.TokenError, IndentationError): + _, tokerr, _ = sys.exc_info() + msg, lineno = tokerr.args + raise NotPython( + "Couldn't parse '%s' as Python source: '%s' at %s" % + (self.filename, msg, lineno) + ) excluded_lines = self.first_lines(self.excluded) ignore = excluded_lines + list(self.docstrings) @@ -282,7 +309,7 @@ OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') OPS_POP_BLOCK = _opcode_set('POP_BLOCK') # Opcodes that have a jump destination, but aren't really a jump. -OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') +OPS_NO_JUMP = OPS_PUSH_BLOCK # Individual opcodes we need below. OP_BREAK_LOOP = _opcode('BREAK_LOOP') @@ -299,12 +326,16 @@ class ByteParser(object): def __init__(self, code=None, text=None, filename=None): if code: self.code = code + self.text = text else: if not text: assert filename, "If no code or text, need a filename" - sourcef = open(filename, 'rU') - text = sourcef.read() - sourcef.close() + sourcef = open_source(filename) + try: + text = sourcef.read() + finally: + sourcef.close() + self.text = text try: # Python 2.3 and 2.4 don't like partial last lines, so be sure @@ -312,7 +343,7 @@ class ByteParser(object): self.code = compile(text + '\n', filename, "exec") except SyntaxError: _, synerr, _ = sys.exc_info() - raise CoverageException( + raise NotPython( "Couldn't parse '%s' as Python source: '%s' at line %d" % (filename, synerr.msg, synerr.lineno) ) @@ -333,7 +364,8 @@ class ByteParser(object): The iteration includes `self` as its first value. """ - return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) + children = CodeObjects(self.code) + return [ByteParser(code=c, text=self.text) for c in children] # Getting numbers from the lnotab value changed in Py3.0. if sys.version_info >= (3, 0): @@ -385,18 +417,6 @@ class ByteParser(object): stmts.add(l) return stmts - def _disassemble(self): # pragma: no cover - """Disassemble code, for ad-hoc experimenting.""" - - import dis - - for bp in self.child_parsers(): - print("\n%s: " % bp.code) - dis.dis(bp.code) - print("Bytes lines: %r" % bp._bytes_lines()) - - print("") - def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. @@ -509,7 +529,7 @@ class ByteParser(object): chunks.append(chunk) # Give all the chunks a length. - chunks[-1].length = bc.next_offset - chunks[-1].byte + chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 for i in range(len(chunks)-1): chunks[i].length = chunks[i+1].byte - chunks[i].byte @@ -556,7 +576,7 @@ class ByteParser(object): else: # No chunk for this byte! raise Exception("Couldn't find chunk @ %d" % byte) - byte_chunks[byte] = ch + byte_chunks[byte] = ch # pylint: disable=W0631 if ch.line: lines.add(ch.line) @@ -640,144 +660,3 @@ class Chunk(object): return "<%d+%d @%d %r>" % ( self.byte, self.length, self.line, list(self.exits) ) - - -class AdHocMain(object): # pragma: no cover - """An ad-hoc main for code parsing experiments.""" - - def main(self, args): - """A main function for trying the code from the command line.""" - - from optparse import OptionParser - - parser = OptionParser() - parser.add_option( - "-c", action="store_true", dest="chunks", - help="Show basic block chunks" - ) - parser.add_option( - "-d", action="store_true", dest="dis", - help="Disassemble" - ) - parser.add_option( - "-R", action="store_true", dest="recursive", - help="Recurse to find source files" - ) - parser.add_option( - "-s", action="store_true", dest="source", - help="Show analyzed source" - ) - parser.add_option( - "-t", action="store_true", dest="tokens", - help="Show tokens" - ) - - options, args = parser.parse_args() - if options.recursive: - if args: - root = args[0] - else: - root = "." - for root, _, _ in os.walk(root): - for f in glob.glob(root + "/*.py"): - self.adhoc_one_file(options, f) - else: - self.adhoc_one_file(options, args[0]) - - def adhoc_one_file(self, options, filename): - """Process just one file.""" - - if options.dis or options.chunks: - try: - bp = ByteParser(filename=filename) - except CoverageException: - _, err, _ = sys.exc_info() - print("%s" % (err,)) - return - - if options.dis: - print("Main code:") - bp._disassemble() - - if options.chunks: - chunks = bp._all_chunks() - if options.recursive: - print("%6d: %s" % (len(chunks), filename)) - else: - print("Chunks: %r" % chunks) - arcs = bp._all_arcs() - print("Arcs: %r" % sorted(arcs)) - - if options.source or options.tokens: - cp = CodeParser(filename=filename, exclude=r"no\s*cover") - cp.show_tokens = options.tokens - cp._raw_parse() - - if options.source: - if options.chunks: - arc_width, arc_chars = self.arc_ascii_art(arcs) - else: - arc_width, arc_chars = 0, {} - - exit_counts = cp.exit_counts() - - for i, ltext in enumerate(cp.lines): - lineno = i+1 - m0 = m1 = m2 = m3 = a = ' ' - if lineno in cp.statement_starts: - m0 = '-' - exits = exit_counts.get(lineno, 0) - if exits > 1: - m1 = str(exits) - if lineno in cp.docstrings: - m2 = '"' - if lineno in cp.classdefs: - m2 = 'C' - if lineno in cp.excluded: - m3 = 'x' - a = arc_chars.get(lineno, '').ljust(arc_width) - print("%4d %s%s%s%s%s %s" % - (lineno, m0, m1, m2, m3, a, ltext) - ) - - def arc_ascii_art(self, arcs): - """Draw arcs as ascii art. - - Returns a width of characters needed to draw all the arcs, and a - dictionary mapping line numbers to ascii strings to draw for that line. - - """ - arc_chars = {} - for lfrom, lto in sorted(arcs): - if lfrom < 0: - arc_chars[lto] = arc_chars.get(lto, '') + 'v' - elif lto < 0: - arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' - else: - if lfrom == lto - 1: - # Don't show obvious arcs. - continue - if lfrom < lto: - l1, l2 = lfrom, lto - else: - l1, l2 = lto, lfrom - w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) - for l in range(l1, l2+1): - if l == lfrom: - ch = '<' - elif l == lto: - ch = '>' - else: - ch = '|' - arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch - arc_width = 0 - - if arc_chars: - arc_width = max([len(a) for a in arc_chars.values()]) - else: - arc_width = 0 - - return arc_width, arc_chars - -if __name__ == '__main__': - AdHocMain().main(sys.argv[1:]) |