1 files changed, 53 insertions, 174 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index ae618ce5..d894e61c 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,10 +1,12 @@
 """Code parsing for Coverage."""
 
-import glob, opcode, os, re, sys, token, tokenize
+import opcode, re, sys, token, tokenize
 
-from coverage.backward import set, sorted, StringIO # pylint: disable-msg=W0622
+from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
+from coverage.backward import open_source
 from coverage.bytecode import ByteCodes, CodeObjects
-from coverage.misc import nice_pair, CoverageException, NoSource, expensive
+from coverage.misc import nice_pair, expensive, join_regex
+from coverage.misc import CoverageException, NoSource, NotPython
 
 
 class CodeParser(object):
@@ -13,7 +15,7 @@ class CodeParser(object):
     def __init__(self, text=None, filename=None, exclude=None):
         """
         Source can be provided as `text`, the text itself, or `filename`, from
-        which text will be read.  Excluded lines are those that match
+        which the text will be read.  Excluded lines are those that match
         `exclude`, a regex.
 
         """
@@ -22,15 +24,20 @@ class CodeParser(object):
         self.text = text
         if not self.text:
             try:
-                sourcef = open(self.filename, 'rU')
-                self.text = sourcef.read()
-                sourcef.close()
+                sourcef = open_source(self.filename)
+                try:
+                    self.text = sourcef.read()
+                finally:
+                    sourcef.close()
             except IOError:
                 _, err, _ = sys.exc_info()
                 raise NoSource(
                     "No source for code: %r: %s" % (self.filename, err)
                     )
-        self.text = self.text.replace('\r\n', '\n')
+
+        # Scrap the BOM if it exists.
+        if self.text and ord(self.text[0]) == 0xfeff:
+            self.text = self.text[1:]
 
         self.exclude = exclude
 
@@ -65,6 +72,21 @@ class CodeParser(object):
         return self._byte_parser
     byte_parser = property(_get_byte_parser)
 
+    def lines_matching(self, *regexes):
+        """Find the lines matching one of a list of regexes.
+
+        Returns a set of line numbers, the lines that contain a match for one
+        of the regexes in `regexes`.  The entire line needn't match, just a
+        part of it.
+
+        """
+        regex_c = re.compile(join_regex(regexes))
+        matches = set()
+        for i, ltext in enumerate(self.lines):
+            if regex_c.search(ltext):
+                matches.add(i+1)
+        return matches
+
     def _raw_parse(self):
         """Parse the source to find the interesting facts about its lines.
 
@@ -73,10 +95,7 @@ class CodeParser(object):
         """
         # Find lines which match an exclusion pattern.
         if self.exclude:
-            re_exclude = re.compile(self.exclude)
-            for i, ltext in enumerate(self.lines):
-                if re_exclude.search(ltext):
-                    self.excluded.add(i+1)
+            self.excluded = self.lines_matching(self.exclude)
 
         # Tokenize, to find excluded suites, to find docstrings, and to find
         # multi-line statements.
@@ -184,7 +203,15 @@ class CodeParser(object):
         statements.
 
         """
-        self._raw_parse()
+        try:
+            self._raw_parse()
+        except (tokenize.TokenError, IndentationError):
+            _, tokerr, _ = sys.exc_info()
+            msg, lineno = tokerr.args
+            raise NotPython(
+                "Couldn't parse '%s' as Python source: '%s' at %s" %
+                    (self.filename, msg, lineno)
+                )
 
         excluded_lines = self.first_lines(self.excluded)
         ignore = excluded_lines + list(self.docstrings)
@@ -282,7 +309,7 @@ OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
 
 # Opcodes that have a jump destination, but aren't really a jump.
-OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
+OPS_NO_JUMP = OPS_PUSH_BLOCK
 
 # Individual opcodes we need below.
 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
@@ -299,12 +326,16 @@ class ByteParser(object):
     def __init__(self, code=None, text=None, filename=None):
         if code:
             self.code = code
+            self.text = text
         else:
             if not text:
                 assert filename, "If no code or text, need a filename"
-                sourcef = open(filename, 'rU')
-                text = sourcef.read()
-                sourcef.close()
+                sourcef = open_source(filename)
+                try:
+                    text = sourcef.read()
+                finally:
+                    sourcef.close()
+            self.text = text
 
             try:
                 # Python 2.3 and 2.4 don't like partial last lines, so be sure
@@ -312,7 +343,7 @@ class ByteParser(object):
                 self.code = compile(text + '\n', filename, "exec")
             except SyntaxError:
                 _, synerr, _ = sys.exc_info()
-                raise CoverageException(
+                raise NotPython(
                     "Couldn't parse '%s' as Python source: '%s' at line %d" %
                         (filename, synerr.msg, synerr.lineno)
                     )
@@ -333,7 +364,8 @@ class ByteParser(object):
         The iteration includes `self` as its first value.
 
         """
-        return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
+        children = CodeObjects(self.code)
+        return [ByteParser(code=c, text=self.text) for c in children]
 
     # Getting numbers from the lnotab value changed in Py3.0.
     if sys.version_info >= (3, 0):
@@ -385,18 +417,6 @@ class ByteParser(object):
                 stmts.add(l)
         return stmts
 
-    def _disassemble(self):     # pragma: no cover
-        """Disassemble code, for ad-hoc experimenting."""
-
-        import dis
-
-        for bp in self.child_parsers():
-            print("\n%s: " % bp.code)
-            dis.dis(bp.code)
-            print("Bytes lines: %r" % bp._bytes_lines())
-
-        print("")
-
     def _split_into_chunks(self):
         """Split the code object into a list of `Chunk` objects.
 
@@ -509,7 +529,7 @@ class ByteParser(object):
                             chunks.append(chunk)
 
             # Give all the chunks a length.
-            chunks[-1].length = bc.next_offset - chunks[-1].byte
+            chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
             for i in range(len(chunks)-1):
                 chunks[i].length = chunks[i+1].byte - chunks[i].byte
 
@@ -556,7 +576,7 @@ class ByteParser(object):
                     else:
                         # No chunk for this byte!
                         raise Exception("Couldn't find chunk @ %d" % byte)
-                    byte_chunks[byte] = ch
+                    byte_chunks[byte] = ch          # pylint: disable=W0631
 
                 if ch.line:
                     lines.add(ch.line)
@@ -640,144 +660,3 @@ class Chunk(object):
         return "<%d+%d @%d %r>" % (
             self.byte, self.length, self.line, list(self.exits)
             )
-
-
-class AdHocMain(object):        # pragma: no cover
-    """An ad-hoc main for code parsing experiments."""
-
-    def main(self, args):
-        """A main function for trying the code from the command line."""
-
-        from optparse import OptionParser
-
-        parser = OptionParser()
-        parser.add_option(
-            "-c", action="store_true", dest="chunks",
-            help="Show basic block chunks"
-            )
-        parser.add_option(
-            "-d", action="store_true", dest="dis",
-            help="Disassemble"
-            )
-        parser.add_option(
-            "-R", action="store_true", dest="recursive",
-            help="Recurse to find source files"
-            )
-        parser.add_option(
-            "-s", action="store_true", dest="source",
-            help="Show analyzed source"
-            )
-        parser.add_option(
-            "-t", action="store_true", dest="tokens",
-            help="Show tokens"
-            )
-
-        options, args = parser.parse_args()
-        if options.recursive:
-            if args:
-                root = args[0]
-            else:
-                root = "."
-            for root, _, _ in os.walk(root):
-                for f in glob.glob(root + "/*.py"):
-                    self.adhoc_one_file(options, f)
-        else:
-            self.adhoc_one_file(options, args[0])
-
-    def adhoc_one_file(self, options, filename):
-        """Process just one file."""
-
-        if options.dis or options.chunks:
-            try:
-                bp = ByteParser(filename=filename)
-            except CoverageException:
-                _, err, _ = sys.exc_info()
-                print("%s" % (err,))
-                return
-
-        if options.dis:
-            print("Main code:")
-            bp._disassemble()
-
-        if options.chunks:
-            chunks = bp._all_chunks()
-            if options.recursive:
-                print("%6d: %s" % (len(chunks), filename))
-            else:
-                print("Chunks: %r" % chunks)
-                arcs = bp._all_arcs()
-                print("Arcs: %r" % sorted(arcs))
-
-        if options.source or options.tokens:
-            cp = CodeParser(filename=filename, exclude=r"no\s*cover")
-            cp.show_tokens = options.tokens
-            cp._raw_parse()
-
-            if options.source:
-                if options.chunks:
-                    arc_width, arc_chars = self.arc_ascii_art(arcs)
-                else:
-                    arc_width, arc_chars = 0, {}
-
-                exit_counts = cp.exit_counts()
-
-                for i, ltext in enumerate(cp.lines):
-                    lineno = i+1
-                    m0 = m1 = m2 = m3 = a = ' '
-                    if lineno in cp.statement_starts:
-                        m0 = '-'
-                    exits = exit_counts.get(lineno, 0)
-                    if exits > 1:
-                        m1 = str(exits)
-                    if lineno in cp.docstrings:
-                        m2 = '"'
-                    if lineno in cp.classdefs:
-                        m2 = 'C'
-                    if lineno in cp.excluded:
-                        m3 = 'x'
-                    a = arc_chars.get(lineno, '').ljust(arc_width)
-                    print("%4d %s%s%s%s%s %s" %
-                                (lineno, m0, m1, m2, m3, a, ltext)
-                        )
-
-    def arc_ascii_art(self, arcs):
-        """Draw arcs as ascii art.
-
-        Returns a width of characters needed to draw all the arcs, and a
-        dictionary mapping line numbers to ascii strings to draw for that line.
-
-        """
-        arc_chars = {}
-        for lfrom, lto in sorted(arcs):
-            if lfrom < 0:
-                arc_chars[lto] = arc_chars.get(lto, '') + 'v'
-            elif lto < 0:
-                arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
-            else:
-                if lfrom == lto - 1:
-                    # Don't show obvious arcs.
-                    continue
-                if lfrom < lto:
-                    l1, l2 = lfrom, lto
-                else:
-                    l1, l2 = lto, lfrom
-                w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)])
-                for l in range(l1, l2+1):
-                    if l == lfrom:
-                        ch = '<'
-                    elif l == lto:
-                        ch = '>'
-                    else:
-                        ch = '|'
-                    arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch
-                arc_width = 0
-
-        if arc_chars:
-            arc_width = max([len(a) for a in arc_chars.values()])
-        else:
-            arc_width = 0
-
-        return arc_width, arc_chars
-
-if __name__ == '__main__':
-    AdHocMain().main(sys.argv[1:])