1 files changed, 86 insertions, 47 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index 905596be..7f50a9bc 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,12 +1,12 @@
 """Code parsing for Coverage."""
 
-import re, sys, token, tokenize, types
+import opcode, re, sys, token, tokenize, types
 
-from coverage.misc import nice_pair, CoverageException
 from coverage.backward import set, StringIO   # pylint: disable-msg=W0622
+from coverage.bytecode import ByteCodes, CodeObjects
+from coverage.misc import nice_pair, CoverageException
 
 
-    
 class CodeParser:
     """Parse code to find executable lines, excluded lines, etc."""
     
@@ -28,8 +28,6 @@ class CodeParser:
         # The line numbers that start statements.
         self.statement_starts = set()
 
-        self.bytes_lines = []
-        
     # Getting numbers from the lnotab value changed in Py3.0.    
     if sys.hexversion >= 0x03000000:
         def _lnotab_increments(self, lnotab):
@@ -40,32 +38,32 @@ class CodeParser:
             """Return a list of ints from the lnotab string in 2.x"""
             return [ord(c) for c in lnotab]
 
-    def _find_statement_starts(self, code):
-        """Find the starts of statements in compiled code.
+    def _bytes_lines(self, code):
+        """Map byte offsets to line numbers in `code`.
     
-        Uses co_lnotab described in Python/compile.c to find line numbers that
-        start statements, adding them to `self.statement_starts`.
+        Uses co_lnotab described in Python/compile.c to map byte offsets to
+        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
     
         """
         # Adapted from dis.py in the standard library.
         byte_increments = self._lnotab_increments(code.co_lnotab[0::2])
         line_increments = self._lnotab_increments(code.co_lnotab[1::2])
     
+        bytes_lines = []
         last_line_num = None
         line_num = code.co_firstlineno
         byte_num = 0
         for byte_incr, line_incr in zip(byte_increments, line_increments):
             if byte_incr:
                 if line_num != last_line_num:
-                    self.bytes_lines.append((byte_num, line_num))
-                    self.statement_starts.add(line_num)
+                    bytes_lines.append((byte_num, line_num))
                     last_line_num = line_num
                 byte_num += byte_incr
             line_num += line_incr
         if line_num != last_line_num:
-            self.bytes_lines.append((byte_num, line_num))
-            self.statement_starts.add(line_num)
-
+            bytes_lines.append((byte_num, line_num))
+        return bytes_lines
+    
     def _find_statements(self, code):
         """Find the statements in `code`.
         
@@ -76,7 +74,9 @@ class CodeParser:
         # Adapted from trace.py in the standard library.
 
         # Get all of the lineno information from this code.
-        self._find_statement_starts(code)
+        bytes_lines = self._bytes_lines(code)
+        for b, l in bytes_lines:
+            self.statement_starts.add(l)
     
         # Check the constants for references to other code objects.
         for c in code.co_consts:
@@ -220,43 +220,83 @@ class CodeParser:
     
         return lines, excluded_lines, self.multiline
 
-    def _find_byte_chunks(self, code):
-        import opcode
-        
-        code = code.co_code
-        #labels = findlabels(code)
-        #linestarts = dict(findlinestarts(co))
-        n = len(code)
-        i = 0
-        extended_arg = 0
-        free = None
-        while i < n:
-            c = code[i]
-            op = ord(c)
-            print repr(i).rjust(4),
-            print opcode.opname[op].ljust(20)
-            i = i+1
-            if op >= opcode.HAVE_ARGUMENT:
-                oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
-                extended_arg = 0
-                i = i+2
-                if op == opcode.EXTENDED_ARG:
-                    extended_arg = oparg*65536L
-        
     def _disassemble(self, code):
         """Disassemble code, for ad-hoc experimenting."""
         
         import dis
-        dis.dis(code)
-
-        for c in code.co_consts:
-            if isinstance(c, types.CodeType):
-                # Found another code object, so recurse into it.
-                print("\n%s:" % c)
-                self._disassemble(c)
+        
+        for codeobj in CodeObjects(code):
+            print("\n%s: " % codeobj)
+            dis.dis(codeobj)
+            print("Bytes lines: %r" % self._bytes_lines(codeobj))
+            print("Jumps: %r %r" % self._find_byte_jumps(codeobj))
+            print(self._split_into_chunks(codeobj))
 
         print("")
+
+    def _line_for_byte(self, bytes_lines, byte):
+        last_line = 0
+        for b, l in bytes_lines:
+            if b == byte:
+                return l
+            elif b > byte:
+                return last_line
+            else:
+                last_line = l
+        return last_line
+
+    def _find_byte_jumps(self, code):
+        byte_jumps = [(bc.offset, bc.jump_to) for bc in ByteCodes(code.co_code) if bc.jump_to >= 0]
         
+        bytes_lines = self._bytes_lines(code)
+        line_jumps = [(self._line_for_byte(bytes_lines, b0), self._line_for_byte(bytes_lines, b1)) for b0, b1 in byte_jumps]
+        return byte_jumps, line_jumps
+
+    _chunk_enders = set([opcode.opmap[name] for name in ['JUMP_ABSOLUTE', 'RETURN_VALUE']])
+    
+    def _split_into_chunks(self, code):
+        class Chunk(object):
+            def __init__(self, byte, line=0):
+                self.byte = byte
+                self.line = line
+                self.exits = set()
+                
+            def __repr__(self):
+                return "<%d:%d %r>" % (self.byte, self.line, list(self.exits))
+
+        chunks = []
+        chunk = None
+        bytes_lines_map = dict(self._bytes_lines(code))
+        
+        for bc in ByteCodes(code.co_code):
+            # Maybe have to start a new block
+            if bc.offset in bytes_lines_map:
+                if chunk:
+                    chunk.exits.add(bc.offset)
+                chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
+                chunks.append(chunk)
+                
+            if not chunk:
+                chunk = Chunk(bc.offset)
+                chunks.append(chunk)
+                
+            if bc.jump_to >= 0:
+                chunk.exits.add(bc.jump_to)
+            
+            if bc.op in self._chunk_enders:
+                chunk = None
+        
+        warnings = []
+        # Find anonymous chunks (not associated with a line number), and find
+        # the numbered chunks that jump to them.
+        for ch in chunks:
+            if not ch.line:
+                jumpers = [c for c in chunks if ch.line in c.exits]
+                if len(jumpers) > 1:
+                    warnings.append("Chunk at %d has %d jumpers" % (ch.byte, len(jumpers)))
+                
+        return warnings, chunks
+
     def adhoc_main(self, args):
         """A main function for trying the code from the command line."""
 
@@ -285,12 +325,11 @@ class CodeParser:
             self._disassemble(code)
 
         if options.chunks:
-            self._find_byte_chunks(code)
+            self._split_into_chunks(code)
 
         self.show_tokens = options.tokens
         self._raw_parse(filename=filename, exclude=r"no\s*cover")
 
-        print self.bytes_lines
         for i, ltext in enumerate(self.lines):
             lineno = i+1
             m0 = m1 = m2 = ' '