1 files changed, 154 insertions, 100 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index 01b38af3..b090f02d 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -9,13 +9,13 @@ from coverage.misc import nice_pair, CoverageException, NoSource, expensive
 
 class CodeParser(object):
     """Parse code to find executable lines, excluded lines, etc."""
-    
+
     def __init__(self, text=None, filename=None, exclude=None):
         """
         Source can be provided as `text`, the text itself, or `filename`, from
-        which text will be read.  Excluded lines are those that match `exclude`,
-        a regex.
-        
+        which text will be read.  Excluded lines are those that match
+        `exclude`, a regex.
+
         """
         assert text or filename, "CodeParser needs either text or filename"
         self.filename = filename or "<code>"
@@ -33,7 +33,7 @@ class CodeParser(object):
         self.text = self.text.replace('\r\n', '\n')
 
         self.exclude = exclude
-        
+
         self.show_tokens = False
 
         # The text lines of the parsed code.
@@ -41,22 +41,22 @@ class CodeParser(object):
 
         # The line numbers of excluded lines of code.
         self.excluded = set()
-        
+
         # The line numbers of docstring lines.
         self.docstrings = set()
-        
+
         # The line numbers of class definitions.
         self.classdefs = set()
 
         # A dict mapping line numbers to (lo,hi) for multi-line statements.
         self.multiline = {}
-        
+
         # The line numbers that start statements.
         self.statement_starts = set()
 
         # Lazily-created ByteParser
         self._byte_parser = None
-        
+
     def _get_byte_parser(self):
         """Create a ByteParser on demand."""
         if not self._byte_parser:
@@ -67,9 +67,9 @@ class CodeParser(object):
 
     def _raw_parse(self):
         """Parse the source to find the interesting facts about its lines.
-        
+
         A handful of member fields are updated.
-        
+
         """
         # Find lines which match an exclusion pattern.
         if self.exclude:
@@ -77,7 +77,7 @@ class CodeParser(object):
             for i, ltext in enumerate(self.lines):
                 if re_exclude.search(ltext):
                     self.excluded.add(i+1)
-    
+
         # Tokenize, to find excluded suites, to find docstrings, and to find
         # multi-line statements.
         indent = 0
@@ -88,7 +88,7 @@ class CodeParser(object):
 
         tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
         for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
-            if self.show_tokens:
+            if self.show_tokens:                # pragma: no cover
                 print("%10s %5s %-20r %r" % (
                     tokenize.tok_name.get(toktype, toktype),
                     nice_pair((slineno, elineno)), ttext, ltext
@@ -111,7 +111,9 @@ class CodeParser(object):
                     excluding = True
             elif toktype == token.STRING and prev_toktype == token.INDENT:
                 # Strings that are first on an indented line are docstrings.
-                # (a trick from trace.py in the stdlib.)
+                # (a trick from trace.py in the stdlib.) This works for
+                # 99.9999% of cases.  For the rest (!) see:
+                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
                 for i in range(slineno, elineno+1):
                     self.docstrings.add(i)
             elif toktype == token.NEWLINE:
@@ -123,7 +125,7 @@ class CodeParser(object):
                     for l in range(first_line, elineno+1):
                         self.multiline[l] = rng
                 first_line = None
-                
+
             if ttext.strip() and toktype != tokenize.COMMENT:
                 # A non-whitespace token.
                 if first_line is None:
@@ -135,7 +137,7 @@ class CodeParser(object):
                         excluding = False
                     if excluding:
                         self.excluded.add(elineno)
-                        
+
             prev_toktype = toktype
 
         # Find the starts of the executable statements.
@@ -153,11 +155,11 @@ class CodeParser(object):
     def first_lines(self, lines, ignore=None):
         """Map the line numbers in `lines` to the correct first line of the
         statement.
-        
+
         Skip any line mentioned in `ignore`.
-        
+
         Returns a sorted list of the first lines.
-        
+
         """
         ignore = ignore or []
         lset = set()
@@ -168,31 +170,31 @@ class CodeParser(object):
             if new_l not in ignore:
                 lset.add(new_l)
         return sorted(lset)
-    
+
     def parse_source(self):
         """Parse source text to find executable lines, excluded lines, etc.
 
         Return values are 1) a sorted list of executable line numbers, and
         2) a sorted list of excluded line numbers.
-        
+
         Reported line numbers are normalized to the first line of multi-line
         statements.
-        
+
         """
         self._raw_parse()
-        
+
         excluded_lines = self.first_lines(self.excluded)
         ignore = excluded_lines + list(self.docstrings)
         lines = self.first_lines(self.statement_starts, ignore)
-    
+
         return lines, excluded_lines
 
     def arcs(self):
         """Get information about the arcs available in the code.
-        
+
         Returns a sorted list of line number pairs.  Line numbers have been
         normalized to the first line of multiline statements.
-        
+
         """
         all_arcs = []
         for l1, l2 in self.byte_parser._all_arcs():
@@ -205,27 +207,32 @@ class CodeParser(object):
 
     def exit_counts(self):
         """Get a mapping from line numbers to count of exits from that line.
-        
+
         Excluded lines are excluded.
-        
+
         """
         excluded_lines = self.first_lines(self.excluded)
         exit_counts = {}
         for l1, l2 in self.arcs():
-            if l1 == -1:
+            if l1 < 0:
+                # Don't ever report -1 as a line number
                 continue
             if l1 in excluded_lines:
+                # Don't report excluded lines as line numbers.
+                continue
+            if l2 in excluded_lines:
+                # Arcs to excluded lines shouldn't count.
                 continue
             if l1 not in exit_counts:
                 exit_counts[l1] = 0
             exit_counts[l1] += 1
-        
+
         # Class definitions have one extra exit, so remove one for each:
         for l in self.classdefs:
-            # Ensure key is there - #pragma: no cover will mean its not
+            # Ensure key is there: classdefs can include excluded lines.
             if l in exit_counts:
                 exit_counts[l] -= 1
-                
+
         return exit_counts
     exit_counts = expensive(exit_counts)
 
@@ -249,6 +256,11 @@ OPS_CHUNK_END = _opcode_set(
     'BREAK_LOOP', 'CONTINUE_LOOP',
     )
 
+# Opcodes that unconditionally begin a new code chunk.  By starting new chunks
+# with unconditional jump instructions, we neatly deal with jumps to jumps
+# properly.
+OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
+
 # Opcodes that push a block on the block stack.
 OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY')
 
@@ -266,6 +278,8 @@ OP_BREAK_LOOP = _opcode('BREAK_LOOP')
 OP_END_FINALLY = _opcode('END_FINALLY')
 OP_COMPARE_OP = _opcode('COMPARE_OP')
 COMPARE_EXCEPTION = 10  # just have to get this const from the code.
+OP_LOAD_CONST = _opcode('LOAD_CONST')
+OP_RETURN_VALUE = _opcode('RETURN_VALUE')
 
 
 class ByteParser(object):
@@ -294,14 +308,14 @@ class ByteParser(object):
 
     def child_parsers(self):
         """Iterate over all the code objects nested within this one.
-        
+
         The iteration includes `self` as its first value.
-        
+
         """
         return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
 
-    # Getting numbers from the lnotab value changed in Py3.0.    
-    if sys.hexversion >= 0x03000000:
+    # Getting numbers from the lnotab value changed in Py3.0.
+    if sys.version_info >= (3, 0):
         def _lnotab_increments(self, lnotab):
             """Return a list of ints from the lnotab bytes in 3.x"""
             return list(lnotab)
@@ -312,15 +326,15 @@ class ByteParser(object):
 
     def _bytes_lines(self):
         """Map byte offsets to line numbers in `code`.
-    
+
         Uses co_lnotab described in Python/compile.c to map byte offsets to
         line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
-    
+
         """
         # Adapted from dis.py in the standard library.
         byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
         line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
-    
+
         bytes_lines = []
         last_line_num = None
         line_num = self.code.co_firstlineno
@@ -335,13 +349,13 @@ class ByteParser(object):
         if line_num != last_line_num:
             bytes_lines.append((byte_num, line_num))
         return bytes_lines
-    
+
     def _find_statements(self):
         """Find the statements in `self.code`.
-        
+
         Return a set of line numbers that start statements.  Recurses into all
         code objects reachable from `self.code`.
-        
+
         """
         stmts = set()
         for bp in self.child_parsers():
@@ -349,12 +363,12 @@ class ByteParser(object):
             for _, l in bp._bytes_lines():
                 stmts.add(l)
         return stmts
-    
-    def _disassemble(self):
+
+    def _disassemble(self):     # pragma: no cover
         """Disassemble code, for ad-hoc experimenting."""
-        
+
         import dis
-        
+
         for bp in self.child_parsers():
             print("\n%s: " % bp.code)
             dis.dis(bp.code)
@@ -364,41 +378,52 @@ class ByteParser(object):
 
     def _split_into_chunks(self):
         """Split the code object into a list of `Chunk` objects.
-        
+
         Each chunk is only entered at its first instruction, though there can
         be many exits from a chunk.
-        
+
         Returns a list of `Chunk` objects.
-        
+
         """
 
         # The list of chunks so far, and the one we're working on.
         chunks = []
         chunk = None
         bytes_lines_map = dict(self._bytes_lines())
-        
+
         # The block stack: loops and try blocks get pushed here for the
         # implicit jumps that can occur.
         # Each entry is a tuple: (block type, destination)
         block_stack = []
-        
+
         # Some op codes are followed by branches that should be ignored.  This
         # is a count of how many ignores are left.
         ignore_branch = 0
 
+        # We have to handle the last two bytecodes specially.
+        ult = penult = None
+
         for bc in ByteCodes(self.code.co_code):
-            # Maybe have to start a new block
+            # Maybe have to start a new chunk
             if bc.offset in bytes_lines_map:
+                # Start a new chunk for each source line number.
                 if chunk:
                     chunk.exits.add(bc.offset)
                 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
                 chunks.append(chunk)
-                
+            elif bc.op in OPS_CHUNK_BEGIN:
+                # Jumps deserve their own unnumbered chunk.  This fixes
+                # problems with jumps to jumps getting confused.
+                if chunk:
+                    chunk.exits.add(bc.offset)
+                chunk = Chunk(bc.offset)
+                chunks.append(chunk)
+
             if not chunk:
                 chunk = Chunk(bc.offset)
                 chunks.append(chunk)
 
-            # Look at the opcode                
+            # Look at the opcode
             if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
                 if ignore_branch:
                     # Someone earlier wanted us to ignore this branch.
@@ -406,10 +431,10 @@ class ByteParser(object):
                 else:
                     # The opcode has a jump, it's an exit for this chunk.
                     chunk.exits.add(bc.jump_to)
-            
+
             if bc.op in OPS_CODE_END:
                 # The opcode can exit the code object.
-                chunk.exits.add(-1)
+                chunk.exits.add(-self.code.co_firstlineno)
             if bc.op in OPS_PUSH_BLOCK:
                 # The opcode adds a block to the block_stack.
                 block_stack.append((bc.op, bc.jump_to))
@@ -438,8 +463,32 @@ class ByteParser(object):
                 # This is an except clause.  We want to overlook the next
                 # branch, so that except's don't count as branches.
                 ignore_branch += 1
-            
+
+            penult = ult
+            ult = bc
+
+
         if chunks:
+            # The last two bytecodes could be a dummy "return None" that
+            # shouldn't be counted as real code. Every Python code object seems
+            # to end with a return, and a "return None" is inserted if there
+            # isn't an explicit return in the source.
+            if ult and penult:
+                if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
+                    if self.code.co_consts[penult.arg] is None:
+                        # This is "return None", but is it dummy?  A real line
+                        # would be a last chunk all by itself.
+                        if chunks[-1].byte != penult.offset:
+                            exit = -self.code.co_firstlineno
+                            # Split the last chunk
+                            last_chunk = chunks[-1]
+                            last_chunk.exits.remove(exit)
+                            last_chunk.exits.add(penult.offset)
+                            chunk = Chunk(penult.offset)
+                            chunk.exits.add(exit)
+                            chunks.append(chunk)
+
+            # Give all the chunks a length.
             chunks[-1].length = bc.next_offset - chunks[-1].byte
             for i in range(len(chunks)-1):
                 chunks[i].length = chunks[i+1].byte - chunks[i].byte
@@ -448,35 +497,35 @@ class ByteParser(object):
 
     def _arcs(self):
         """Find the executable arcs in the code.
-        
+
         Returns a set of pairs, (from,to).  From and to are integer line
-        numbers.  If from is -1, then the arc is an entrance into the code
-        object.  If to is -1, the arc is an exit from the code object.
-        
+        numbers.  If from is < 0, then the arc is an entrance into the code
+        object.  If to is < 0, the arc is an exit from the code object.
+
         """
         chunks = self._split_into_chunks()
-        
+
         # A map from byte offsets to chunks jumped into.
         byte_chunks = dict([(c.byte, c) for c in chunks])
 
         # Build a map from byte offsets to actual lines reached.
-        byte_lines = {-1:[-1]}
+        byte_lines = {}
         bytes_to_add = set([c.byte for c in chunks])
-        
+
         while bytes_to_add:
             byte_to_add = bytes_to_add.pop()
-            if byte_to_add in byte_lines or byte_to_add == -1:
+            if byte_to_add in byte_lines or byte_to_add < 0:
                 continue
-            
+
             # Which lines does this chunk lead to?
             bytes_considered = set()
             bytes_to_consider = [byte_to_add]
             lines = set()
-            
+
             while bytes_to_consider:
                 byte = bytes_to_consider.pop()
                 bytes_considered.add(byte)
-                
+
                 # Find chunk for byte
                 try:
                     ch = byte_chunks[byte]
@@ -488,89 +537,94 @@ class ByteParser(object):
                         # No chunk for this byte!
                         raise Exception("Couldn't find chunk @ %d" % byte)
                     byte_chunks[byte] = ch
-                    
+
                 if ch.line:
                     lines.add(ch.line)
                 else:
                     for ex in ch.exits:
-                        if ex == -1:
-                            lines.add(-1)
+                        if ex < 0:
+                            lines.add(ex)
                         elif ex not in bytes_considered:
                             bytes_to_consider.append(ex)
 
                 bytes_to_add.update(ch.exits)
 
             byte_lines[byte_to_add] = lines
-        
+
         # Figure out for each chunk where the exits go.
         arcs = set()
         for chunk in chunks:
             if chunk.line:
                 for ex in chunk.exits:
-                    for exit_line in byte_lines[ex]:
+                    if ex < 0:
+                        exit_lines = [ex]
+                    else:
+                        exit_lines = byte_lines[ex]
+                    for exit_line in exit_lines:
                         if chunk.line != exit_line:
                             arcs.add((chunk.line, exit_line))
         for line in byte_lines[0]:
             arcs.add((-1, line))
-        
+
         return arcs
-        
+
     def _all_chunks(self):
         """Returns a list of `Chunk` objects for this code and its children.
-        
+
         See `_split_into_chunks` for details.
-        
+
         """
         chunks = []
         for bp in self.child_parsers():
             chunks.extend(bp._split_into_chunks())
-        
+
         return chunks
 
     def _all_arcs(self):
         """Get the set of all arcs in this code object and its children.
-        
+
         See `_arcs` for details.
-        
+
         """
         arcs = set()
         for bp in self.child_parsers():
             arcs.update(bp._arcs())
-        
+
         return arcs
 
 
 class Chunk(object):
     """A sequence of bytecodes with a single entrance.
-    
+
     To analyze byte code, we have to divide it into chunks, sequences of byte
     codes such that each basic block has only one entrance, the first
-    instruction in the block. 
-    
+    instruction in the block.
+
     This is almost the CS concept of `basic block`_, except that we're willing
     to have many exits from a chunk, and "basic block" is a more cumbersome
     term.
-    
+
     .. _basic block: http://en.wikipedia.org/wiki/Basic_block
-    
-    An exit of -1 means the chunk can leave the code (return).
-    
+
+    An exit < 0 means the chunk can leave the code (return).  The exit is
+    the negative of the starting line number of the code block.
+
     """
     def __init__(self, byte, line=0):
         self.byte = byte
         self.line = line
         self.length = 0
         self.exits = set()
-        
+
     def __repr__(self):
         return "<%d+%d @%d %r>" % (
             self.byte, self.length, self.line, list(self.exits)
             )
 
 
-class AdHocMain(object):
+class AdHocMain(object):        # pragma: no cover
     """An ad-hoc main for code parsing experiments."""
-    
+
     def main(self, args):
         """A main function for trying the code from the command line."""
 
@@ -597,7 +651,7 @@ class AdHocMain(object):
             "-t", action="store_true", dest="tokens",
             help="Show tokens"
             )
-        
+
         options, args = parser.parse_args()
         if options.recursive:
             if args:
@@ -612,12 +666,12 @@ class AdHocMain(object):
 
     def adhoc_one_file(self, options, filename):
         """Process just one file."""
-        
+
         if options.dis or options.chunks:
             try:
                 bp = ByteParser(filename=filename)
             except CoverageException:
-                _, err, _ = sys.exc_info()                
+                _, err, _ = sys.exc_info()
                 print("%s" % (err,))
                 return
 
@@ -644,7 +698,7 @@ class AdHocMain(object):
                     arc_width, arc_chars = self.arc_ascii_art(arcs)
                 else:
                     arc_width, arc_chars = 0, {}
-                    
+
                 exit_counts = cp.exit_counts()
 
                 for i, ltext in enumerate(cp.lines):
@@ -668,19 +722,19 @@ class AdHocMain(object):
 
     def arc_ascii_art(self, arcs):
         """Draw arcs as ascii art.
-        
+
         Returns a width of characters needed to draw all the arcs, and a
         dictionary mapping line numbers to ascii strings to draw for that line.
-        
+
         """
         arc_chars = {}
         for lfrom, lto in sorted(arcs):
-            if lfrom == -1:
+            if lfrom < 0:
                 arc_chars[lto] = arc_chars.get(lto, '') + 'v'
-            elif lto == -1:
+            elif lto < 0:
                 arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
             else:
-                if lfrom == lto-1:
+                if lfrom == lto - 1:
                     # Don't show obvious arcs.
                     continue
                 if lfrom < lto: