1 files changed, 231 insertions, 145 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index 0747d4a..ff066e8 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -10,11 +10,28 @@ from coverage.misc import nice_pair, CoverageException
 class CodeParser:
     """Parse code to find executable lines, excluded lines, etc."""
     
-    def __init__(self):
+    def __init__(self, text=None, filename=None, exclude=None):
+        """
+        Source can be provided as `text`, the text itself, or `filename`, from
+        which text will be read.  Excluded lines are those that match `exclude`,
+        a regex.
+        
+        """
+
+        assert text or filename, "CodeParser needs either text or filename"
+        self.filename = filename or "<code>"
+        if not text:
+            sourcef = open(self.filename, 'rU')
+            self.text = sourcef.read()
+            sourcef.close()
+        self.text = self.text.replace('\r\n', '\n')
+
+        self.exclude = exclude
+        
         self.show_tokens = False
 
         # The text lines of the parsed code.
-        self.lines = None
+        self.lines = self.text.split('\n')
 
         # The line numbers of excluded lines of code.
         self.excluded = set()
@@ -28,72 +45,15 @@ class CodeParser:
         # The line numbers that start statements.
         self.statement_starts = set()
 
-    # Getting numbers from the lnotab value changed in Py3.0.    
-    if sys.hexversion >= 0x03000000:
-        def _lnotab_increments(self, lnotab):
-            """Return a list of ints from the lnotab bytes in 3.x"""
-            return list(lnotab)
-    else:
-        def _lnotab_increments(self, lnotab):
-            """Return a list of ints from the lnotab string in 2.x"""
-            return [ord(c) for c in lnotab]
-
-    def _bytes_lines(self, code):
-        """Map byte offsets to line numbers in `code`.
-    
-        Uses co_lnotab described in Python/compile.c to map byte offsets to
-        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
-    
-        """
-        # Adapted from dis.py in the standard library.
-        byte_increments = self._lnotab_increments(code.co_lnotab[0::2])
-        line_increments = self._lnotab_increments(code.co_lnotab[1::2])
-    
-        bytes_lines = []
-        last_line_num = None
-        line_num = code.co_firstlineno
-        byte_num = 0
-        for byte_incr, line_incr in zip(byte_increments, line_increments):
-            if byte_incr:
-                if line_num != last_line_num:
-                    bytes_lines.append((byte_num, line_num))
-                    last_line_num = line_num
-                byte_num += byte_incr
-            line_num += line_incr
-        if line_num != last_line_num:
-            bytes_lines.append((byte_num, line_num))
-        return bytes_lines
-    
-    def _find_statements(self, code):
-        """Find the statements in `code`.
-        
-        Update `self.statement_starts`, a set of line numbers that start
-        statements.  Recurses into all code objects reachable from `code`.
-        
-        """
-        # Adapted from trace.py in the standard library.
-        for co in CodeObjects(code):
-            # Get all of the lineno information from this code.
-            bytes_lines = self._bytes_lines(co)
-            for b, l in bytes_lines:
-                self.statement_starts.add(l)
-    
-    def _raw_parse(self, text=None, filename=None, exclude=None):
-        """Parse `text` to find the interesting facts about its lines.
+    def _raw_parse(self):
+        """Parse the source to find the interesting facts about its lines.
         
         A handful of member fields are updated.
         
         """
-        if not text:
-            sourcef = open(filename, 'rU')
-            text = sourcef.read()
-            sourcef.close()
-        text = text.replace('\r\n', '\n')
-        self.lines = text.split('\n')
-
         # Find lines which match an exclusion pattern.
-        if exclude:
-            re_exclude = re.compile(exclude)
+        if self.exclude:
+            re_exclude = re.compile(self.exclude)
             for i, ltext in enumerate(self.lines):
                 if re_exclude.search(ltext):
                     self.excluded.add(i+1)
@@ -106,7 +66,7 @@ class CodeParser:
         prev_toktype = token.INDENT
         first_line = None
 
-        tokgen = tokenize.generate_tokens(StringIO(text).readline)
+        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
         for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
             if self.show_tokens:
                 print("%10s %5s %-20r %r" % (
@@ -154,20 +114,8 @@ class CodeParser:
             prev_toktype = toktype
 
         # Find the starts of the executable statements.
-        filename = filename or "<code>"
-        try:
-            # Python 2.3 and 2.4 don't like partial last lines, so be sure the
-            # text ends nicely for them.
-            text += '\n'
-            code = compile(text, filename, "exec")
-        except SyntaxError:
-            _, synerr, _ = sys.exc_info()
-            raise CoverageException(
-                "Couldn't parse '%s' as Python source: '%s' at line %d" %
-                    (filename, synerr.msg, synerr.lineno)
-                )
-
-        self._find_statements(code)
+        byte_parser = ByteParser(text=self.text, filename=self.filename)
+        self.statement_starts.update(byte_parser._find_statements())
 
     def _map_to_first_line(self, lines, ignore=None):
         """Map the line numbers in `lines` to the correct first line of the
@@ -194,19 +142,15 @@ class CodeParser:
         lines.sort()
         return lines
     
-    def parse_source(self, text=None, filename=None, exclude=None):
+    def parse_source(self):
         """Parse source text to find executable lines, excluded lines, etc.
-        
-        Source can be provided as `text`, the text itself, or `filename`, from
-        which text will be read.  Excluded lines are those that match `exclude`,
-        a regex.
-        
+
         Return values are 1) a sorted list of executable line numbers,
         2) a sorted list of excluded line numbers, and 3) a dict mapping line
         numbers to pairs (lo,hi) for multi-line statements.
         
         """
-        self._raw_parse(text, filename, exclude)
+        self._raw_parse()
         
         excluded_lines = self._map_to_first_line(self.excluded)
         ignore = excluded_lines + list(self.docstrings)
@@ -214,19 +158,94 @@ class CodeParser:
     
         return lines, excluded_lines, self.multiline
 
-    def _disassemble(self, code):
+
+class ByteParser:
+
+    def __init__(self, code=None, text=None, filename=None):
+
+        if code:
+            self.code = code
+        else:
+            if not text:
+                assert filename, "If no code or text, need a filename"
+                sourcef = open(filename, 'rU')
+                text = sourcef.read()
+                sourcef.close()
+
+            try:
+                # Python 2.3 and 2.4 don't like partial last lines, so be sure
+                # the text ends nicely for them.
+                self.code = compile(text + '\n', filename, "exec")
+            except SyntaxError:
+                _, synerr, _ = sys.exc_info()
+                raise CoverageException(
+                    "Couldn't parse '%s' as Python source: '%s' at line %d" %
+                        (filename, synerr.msg, synerr.lineno)
+                    )
+
+    def child_parsers(self):
+        """Iterate over all the code objects nested within this one, starting with self."""
+        return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
+
+    # Getting numbers from the lnotab value changed in Py3.0.    
+    if sys.hexversion >= 0x03000000:
+        def _lnotab_increments(self, lnotab):
+            """Return a list of ints from the lnotab bytes in 3.x"""
+            return list(lnotab)
+    else:
+        def _lnotab_increments(self, lnotab):
+            """Return a list of ints from the lnotab string in 2.x"""
+            return [ord(c) for c in lnotab]
+
+    def _bytes_lines(self):
+        """Map byte offsets to line numbers in `code`.
+    
+        Uses co_lnotab described in Python/compile.c to map byte offsets to
+        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
+    
+        """
+        # Adapted from dis.py in the standard library.
+        byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
+        line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
+    
+        bytes_lines = []
+        last_line_num = None
+        line_num = self.code.co_firstlineno
+        byte_num = 0
+        for byte_incr, line_incr in zip(byte_increments, line_increments):
+            if byte_incr:
+                if line_num != last_line_num:
+                    bytes_lines.append((byte_num, line_num))
+                    last_line_num = line_num
+                byte_num += byte_incr
+            line_num += line_incr
+        if line_num != last_line_num:
+            bytes_lines.append((byte_num, line_num))
+        return bytes_lines
+    
+    def _find_statements(self):
+        """Find the statements in `self.code`.
+        
+        Return a set of line numbers that start statements.  Recurses into all
+        code objects reachable from `self.code`.
+        
+        """
+        stmts = set()
+        for bp in self.child_parsers():
+            # Get all of the lineno information from this code.
+            for _, l in bp._bytes_lines():
+                stmts.add(l)
+        return stmts
+    
+    def _disassemble(self):
         """Disassemble code, for ad-hoc experimenting."""
         
         import dis
         
-        for codeobj in CodeObjects(code):
-            print("\n%s: " % codeobj)
-            dis.dis(codeobj)
-            print("Bytes lines: %r" % self._bytes_lines(codeobj))
-            print("Jumps: %r %r" % self._find_byte_jumps(codeobj))
-            warnings, chunks = self._split_into_chunks(codeobj)
-            if warnings:
-                print("WARNING: %s" % "\n".join(warnings))
+        for bp in self.child_parsers():
+            print("\n%s: " % bp.code)
+            dis.dis(bp.code)
+            print("Bytes lines: %r" % bp._bytes_lines())
 
         print("")
 
@@ -241,30 +260,31 @@ class CodeParser:
                 last_line = l
         return last_line
 
-    def _find_byte_jumps(self, code):
-        byte_jumps = [(bc.offset, bc.jump_to) for bc in ByteCodes(code.co_code) if bc.jump_to >= 0]
-        
-        bytes_lines = self._bytes_lines(code)
-        line_jumps = [(self._line_for_byte(bytes_lines, b0), self._line_for_byte(bytes_lines, b1)) for b0, b1 in byte_jumps]
-        return byte_jumps, line_jumps
-
-    _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE']])
+    _code_enders = set([opcode.opmap[name] for name in ['RETURN_VALUE']])
+    _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'JUMP_FORWARD']])
+    _chunk_enders |= _code_enders
     
-    def _split_into_chunks(self, code):
+    def _split_into_chunks(self):
         class Chunk(object):
+            """A sequence of bytecodes with exits to other bytecodes.
+            
+            An exit of None means the chunk can leave the code block (return).
+            
+            """
             def __init__(self, byte, line=0):
                 self.byte = byte
                 self.line = line
+                self.length = 0
                 self.exits = set()
                 
             def __repr__(self):
-                return "<%d:%d %r>" % (self.byte, self.line, list(self.exits))
+                return "<%d:%d(%d) %r>" % (self.byte, self.line, self.length, list(self.exits))
 
         chunks = []
         chunk = None
-        bytes_lines_map = dict(self._bytes_lines(code))
-        
-        for bc in ByteCodes(code.co_code):
+        bytes_lines_map = dict(self._bytes_lines())
+
+        for bc in ByteCodes(self.code.co_code):
             # Maybe have to start a new block
             if bc.offset in bytes_lines_map:
                 if chunk:
@@ -279,34 +299,99 @@ class CodeParser:
             if bc.jump_to >= 0:
                 chunk.exits.add(bc.jump_to)
             
+            if bc.op in self._code_enders:
+                chunk.exits.add(None)
+
             if bc.op in self._chunk_enders:
                 chunk = None
+                
+        if chunks:
+            chunks[-1].length = bc.next_offset - chunks[-1].byte
+        for i in range(len(chunks)-1):
+            chunks[i].length = chunks[i+1].byte - chunks[i].byte
+
+        return chunks
+
+    def _arcs(self):
+        chunks = self._split_into_chunks()
         
-        warnings = []
-        # Find anonymous chunks (not associated with a line number), and find
-        # the numbered chunks that jump to them.
-        for ch in chunks:
-            if not ch.line:
-                jumpers = [c for c in chunks if ch.byte in c.exits]
-                if len(jumpers) == 1:
-                    ch.line = jumpers[0].line
-                if len(jumpers) > 1:
-                    warnings.append("Anon chunk at %d has %d jumpers" % (ch.byte, len(jumpers)))
-                #if len(ch.exits) > 1:
-                #    warnings.append("Anon chunk at %d has %d exits" % (ch.byte, len(ch.exits)))
-        return warnings, chunks
-
-    def _all_chunks(self, code):
-        warnings = []
-        chunks = []
-        for co in CodeObjects(code):
-            warns, chs = self._split_into_chunks(co)
-            warnings.extend(warns)
-            chunks.extend(chs)
+        # A map from byte offsets to chunks jumped into.
+        byte_chunks = dict([(c.byte, c) for c in chunks])
+
+        # Build a map from byte offsets to actual lines reached.
+        byte_lines = {None:[None]}
+        bytes_to_add = set([c.byte for c in chunks])
         
-        return warnings, chunks
+        while bytes_to_add:
+            byte_to_add = bytes_to_add.pop()
+            if byte_to_add in byte_lines or byte_to_add is None:
+                continue
             
-    def adhoc_main(self, args):
+            # Which lines does this chunk lead to?
+            bytes_considered = set()
+            bytes_to_consider = [byte_to_add]
+            lines = set()
+            
+            while bytes_to_consider:
+                byte = bytes_to_consider.pop()
+                bytes_considered.add(byte)
+                
+                # Find chunk for byte
+                try:
+                    ch = byte_chunks[byte]
+                except KeyError:
+                    for ch in chunks:
+                        if ch.byte <= byte < ch.byte+ch.length:
+                            break
+                    else:
+                        # No chunk for this byte!
+                        raise Exception("Couldn't find a chunk for byte %d" % byte)
+                    byte_chunks[byte] = ch
+                    
+                if ch.line:
+                    lines.add(ch.line)
+                else:
+                    for ex in ch.exits:
+                        if ex is None:
+                            lines.add(None)
+                        elif ex not in bytes_considered:
+                            bytes_to_consider.append(ex)
+
+                bytes_to_add.update(ch.exits)
+
+            byte_lines[byte_to_add] = lines
+        
+        # Figure out for each chunk where the exits go.
+        arcs = set()
+        for chunk in chunks:
+            if chunk.line:
+                for ex in chunk.exits:
+                    for exit_line in byte_lines[ex]:
+                        arcs.add((chunk.line, exit_line))
+        for line in byte_lines[0]:
+            arcs.add((None, line))
+        
+        return arcs
+        
+    def _all_chunks(self):
+        chunks = []
+        for bp in self.child_parsers():
+            chunks.extend(bp._split_into_chunks())
+        
+        return [], chunks
+
+    def _all_arcs(self):
+        arcs = []
+        for bp in self.child_parsers():
+            arcs.extend(bp._arcs())
+        
+        return arcs
+
+
+class AdHocMain(object):
+    """An ad-hoc main for code parsing experiments."""
+    
+    def main(self, args):
         """A main function for trying the code from the command line."""
 
         from optparse import OptionParser
@@ -341,21 +426,20 @@ class CodeParser:
             self.adhoc_one_file(options, args[0])
 
     def adhoc_one_file(self, options, filename):
-        if options.dis or options.chunks:        
-            source = open(filename, "rU").read() + "\n\n"
+        if options.dis or options.chunks:
             try:
-                code = compile(source, filename, "exec")
-            except SyntaxError:
+                bp = ByteParser(filename=filename)
+            except CoverageException:
                 _, err, _ = sys.exc_info()                
-                print("** Couldn't compile %s: %s" % (filename, err))
+                print("%s" % (err,))
                 return
 
         if options.dis:
             print("Main code:")
-            self._disassemble(code)
+            bp._disassemble()
 
         if options.chunks:
-            warnings, chunks = self._all_chunks(code)
+            warnings, chunks = bp._all_chunks()
             if options.recursive:
                 print("%6d: %s" % (len(chunks), filename))
                 if warnings:
@@ -363,22 +447,24 @@ class CodeParser:
             else:
                 print(warnings)
                 print(chunks)
+                print(bp._all_arcs())
 
-        self.show_tokens = options.tokens
-        self._raw_parse(filename=filename, exclude=r"no\s*cover")
+        if options.source or options.tokens:
+            cp = CodeParser(filename=filename, exclude=r"no\s*cover")
+            cp.show_tokens = options.tokens
+            cp._raw_parse()
 
-        if options.source:
-            for i, ltext in enumerate(self.lines):
+            for i, ltext in enumerate(cp.lines):
                 lineno = i+1
                 m0 = m1 = m2 = ' '
-                if lineno in self.statement_starts:
+                if lineno in cp.statement_starts:
                     m0 = '-'
-                if lineno in self.docstrings:
+                if lineno in cp.docstrings:
                     m1 = '"'
-                if lineno in self.excluded:
+                if lineno in cp.excluded:
                     m2 = 'x'
                 print("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext))
 
 
 if __name__ == '__main__':
-    CodeParser().adhoc_main(sys.argv[1:])
+    AdHocMain().main(sys.argv[1:])