summaryrefslogtreecommitdiff
path: root/coverage/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'coverage/parser.py')
-rw-r--r--coverage/parser.py227
1 files changed, 53 insertions, 174 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index ae618ce5..d894e61c 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,10 +1,12 @@
"""Code parsing for Coverage."""
-import glob, opcode, os, re, sys, token, tokenize
+import opcode, re, sys, token, tokenize
-from coverage.backward import set, sorted, StringIO # pylint: disable-msg=W0622
+from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
+from coverage.backward import open_source
from coverage.bytecode import ByteCodes, CodeObjects
-from coverage.misc import nice_pair, CoverageException, NoSource, expensive
+from coverage.misc import nice_pair, expensive, join_regex
+from coverage.misc import CoverageException, NoSource, NotPython
class CodeParser(object):
@@ -13,7 +15,7 @@ class CodeParser(object):
def __init__(self, text=None, filename=None, exclude=None):
"""
Source can be provided as `text`, the text itself, or `filename`, from
- which text will be read. Excluded lines are those that match
+ which the text will be read. Excluded lines are those that match
`exclude`, a regex.
"""
@@ -22,15 +24,20 @@ class CodeParser(object):
self.text = text
if not self.text:
try:
- sourcef = open(self.filename, 'rU')
- self.text = sourcef.read()
- sourcef.close()
+ sourcef = open_source(self.filename)
+ try:
+ self.text = sourcef.read()
+ finally:
+ sourcef.close()
except IOError:
_, err, _ = sys.exc_info()
raise NoSource(
"No source for code: %r: %s" % (self.filename, err)
)
- self.text = self.text.replace('\r\n', '\n')
+
+ # Scrap the BOM if it exists.
+ if self.text and ord(self.text[0]) == 0xfeff:
+ self.text = self.text[1:]
self.exclude = exclude
@@ -65,6 +72,21 @@ class CodeParser(object):
return self._byte_parser
byte_parser = property(_get_byte_parser)
+ def lines_matching(self, *regexes):
+ """Find the lines matching one of a list of regexes.
+
+ Returns a set of line numbers, the lines that contain a match for one
+ of the regexes in `regexes`. The entire line needn't match, just a
+ part of it.
+
+ """
+ regex_c = re.compile(join_regex(regexes))
+ matches = set()
+ for i, ltext in enumerate(self.lines):
+ if regex_c.search(ltext):
+ matches.add(i+1)
+ return matches
+
def _raw_parse(self):
"""Parse the source to find the interesting facts about its lines.
@@ -73,10 +95,7 @@ class CodeParser(object):
"""
# Find lines which match an exclusion pattern.
if self.exclude:
- re_exclude = re.compile(self.exclude)
- for i, ltext in enumerate(self.lines):
- if re_exclude.search(ltext):
- self.excluded.add(i+1)
+ self.excluded = self.lines_matching(self.exclude)
# Tokenize, to find excluded suites, to find docstrings, and to find
# multi-line statements.
@@ -184,7 +203,15 @@ class CodeParser(object):
statements.
"""
- self._raw_parse()
+ try:
+ self._raw_parse()
+ except (tokenize.TokenError, IndentationError):
+ _, tokerr, _ = sys.exc_info()
+ msg, lineno = tokerr.args
+ raise NotPython(
+ "Couldn't parse '%s' as Python source: '%s' at %s" %
+ (self.filename, msg, lineno)
+ )
excluded_lines = self.first_lines(self.excluded)
ignore = excluded_lines + list(self.docstrings)
@@ -282,7 +309,7 @@ OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
# Opcodes that have a jump destination, but aren't really a jump.
-OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
+OPS_NO_JUMP = OPS_PUSH_BLOCK
# Individual opcodes we need below.
OP_BREAK_LOOP = _opcode('BREAK_LOOP')
@@ -299,12 +326,16 @@ class ByteParser(object):
def __init__(self, code=None, text=None, filename=None):
if code:
self.code = code
+ self.text = text
else:
if not text:
assert filename, "If no code or text, need a filename"
- sourcef = open(filename, 'rU')
- text = sourcef.read()
- sourcef.close()
+ sourcef = open_source(filename)
+ try:
+ text = sourcef.read()
+ finally:
+ sourcef.close()
+ self.text = text
try:
# Python 2.3 and 2.4 don't like partial last lines, so be sure
@@ -312,7 +343,7 @@ class ByteParser(object):
self.code = compile(text + '\n', filename, "exec")
except SyntaxError:
_, synerr, _ = sys.exc_info()
- raise CoverageException(
+ raise NotPython(
"Couldn't parse '%s' as Python source: '%s' at line %d" %
(filename, synerr.msg, synerr.lineno)
)
@@ -333,7 +364,8 @@ class ByteParser(object):
The iteration includes `self` as its first value.
"""
- return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
+ children = CodeObjects(self.code)
+ return [ByteParser(code=c, text=self.text) for c in children]
# Getting numbers from the lnotab value changed in Py3.0.
if sys.version_info >= (3, 0):
@@ -385,18 +417,6 @@ class ByteParser(object):
stmts.add(l)
return stmts
- def _disassemble(self): # pragma: no cover
- """Disassemble code, for ad-hoc experimenting."""
-
- import dis
-
- for bp in self.child_parsers():
- print("\n%s: " % bp.code)
- dis.dis(bp.code)
- print("Bytes lines: %r" % bp._bytes_lines())
-
- print("")
-
def _split_into_chunks(self):
"""Split the code object into a list of `Chunk` objects.
@@ -509,7 +529,7 @@ class ByteParser(object):
chunks.append(chunk)
# Give all the chunks a length.
- chunks[-1].length = bc.next_offset - chunks[-1].byte
+ chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
for i in range(len(chunks)-1):
chunks[i].length = chunks[i+1].byte - chunks[i].byte
@@ -556,7 +576,7 @@ class ByteParser(object):
else:
# No chunk for this byte!
raise Exception("Couldn't find chunk @ %d" % byte)
- byte_chunks[byte] = ch
+ byte_chunks[byte] = ch # pylint: disable=W0631
if ch.line:
lines.add(ch.line)
@@ -640,144 +660,3 @@ class Chunk(object):
return "<%d+%d @%d %r>" % (
self.byte, self.length, self.line, list(self.exits)
)
-
-
-class AdHocMain(object): # pragma: no cover
- """An ad-hoc main for code parsing experiments."""
-
- def main(self, args):
- """A main function for trying the code from the command line."""
-
- from optparse import OptionParser
-
- parser = OptionParser()
- parser.add_option(
- "-c", action="store_true", dest="chunks",
- help="Show basic block chunks"
- )
- parser.add_option(
- "-d", action="store_true", dest="dis",
- help="Disassemble"
- )
- parser.add_option(
- "-R", action="store_true", dest="recursive",
- help="Recurse to find source files"
- )
- parser.add_option(
- "-s", action="store_true", dest="source",
- help="Show analyzed source"
- )
- parser.add_option(
- "-t", action="store_true", dest="tokens",
- help="Show tokens"
- )
-
- options, args = parser.parse_args()
- if options.recursive:
- if args:
- root = args[0]
- else:
- root = "."
- for root, _, _ in os.walk(root):
- for f in glob.glob(root + "/*.py"):
- self.adhoc_one_file(options, f)
- else:
- self.adhoc_one_file(options, args[0])
-
- def adhoc_one_file(self, options, filename):
- """Process just one file."""
-
- if options.dis or options.chunks:
- try:
- bp = ByteParser(filename=filename)
- except CoverageException:
- _, err, _ = sys.exc_info()
- print("%s" % (err,))
- return
-
- if options.dis:
- print("Main code:")
- bp._disassemble()
-
- if options.chunks:
- chunks = bp._all_chunks()
- if options.recursive:
- print("%6d: %s" % (len(chunks), filename))
- else:
- print("Chunks: %r" % chunks)
- arcs = bp._all_arcs()
- print("Arcs: %r" % sorted(arcs))
-
- if options.source or options.tokens:
- cp = CodeParser(filename=filename, exclude=r"no\s*cover")
- cp.show_tokens = options.tokens
- cp._raw_parse()
-
- if options.source:
- if options.chunks:
- arc_width, arc_chars = self.arc_ascii_art(arcs)
- else:
- arc_width, arc_chars = 0, {}
-
- exit_counts = cp.exit_counts()
-
- for i, ltext in enumerate(cp.lines):
- lineno = i+1
- m0 = m1 = m2 = m3 = a = ' '
- if lineno in cp.statement_starts:
- m0 = '-'
- exits = exit_counts.get(lineno, 0)
- if exits > 1:
- m1 = str(exits)
- if lineno in cp.docstrings:
- m2 = '"'
- if lineno in cp.classdefs:
- m2 = 'C'
- if lineno in cp.excluded:
- m3 = 'x'
- a = arc_chars.get(lineno, '').ljust(arc_width)
- print("%4d %s%s%s%s%s %s" %
- (lineno, m0, m1, m2, m3, a, ltext)
- )
-
- def arc_ascii_art(self, arcs):
- """Draw arcs as ascii art.
-
- Returns a width of characters needed to draw all the arcs, and a
- dictionary mapping line numbers to ascii strings to draw for that line.
-
- """
- arc_chars = {}
- for lfrom, lto in sorted(arcs):
- if lfrom < 0:
- arc_chars[lto] = arc_chars.get(lto, '') + 'v'
- elif lto < 0:
- arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
- else:
- if lfrom == lto - 1:
- # Don't show obvious arcs.
- continue
- if lfrom < lto:
- l1, l2 = lfrom, lto
- else:
- l1, l2 = lto, lfrom
- w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)])
- for l in range(l1, l2+1):
- if l == lfrom:
- ch = '<'
- elif l == lto:
- ch = '>'
- else:
- ch = '|'
- arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch
- arc_width = 0
-
- if arc_chars:
- arc_width = max([len(a) for a in arc_chars.values()])
- else:
- arc_width = 0
-
- return arc_width, arc_chars
-
-if __name__ == '__main__':
- AdHocMain().main(sys.argv[1:])