1 files changed, 34 insertions, 26 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
index f488367d..a2b1b610 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -1,4 +1,7 @@
-"""Code parsing for Coverage."""
+# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
+# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
+
+"""Code parsing for coverage.py."""
 
 import collections
 import dis
@@ -9,9 +12,9 @@ import tokenize
 from coverage.backward import range    # pylint: disable=redefined-builtin
 from coverage.backward import bytes_to_ints
 from coverage.bytecode import ByteCodes, CodeObjects
-from coverage.misc import nice_pair, expensive, join_regex
+from coverage.misc import contract, nice_pair, expensive, join_regex
 from coverage.misc import CoverageException, NoSource, NotPython
-from coverage.phystokens import generate_tokens
+from coverage.phystokens import compile_unicode, generate_tokens
 
 
 class CodeParser(object):
@@ -34,6 +37,7 @@ class CodeParser(object):
 class PythonParser(CodeParser):
     """Parse code to find executable lines, excluded lines, etc."""
 
+    @contract(text='unicode|None')
     def __init__(self, text=None, filename=None, exclude=None):
         """
         Source can be provided as `text`, the text itself, or `filename`, from
@@ -53,14 +57,6 @@ class PythonParser(CodeParser):
                     "No source for code: '%s': %s" % (self.filename, err)
                     )
 
-        if self.text:
-            assert isinstance(self.text, str)
-            # Scrap the BOM if it exists.
-            # (Used to do this, but no longer.  Not sure what bad will happen
-            # if we don't do it.)
-            #   if ord(self.text[0]) == 0xfeff:
-            #       self.text = self.text[1:]
-
         self.exclude = exclude
 
         self.show_tokens = False
@@ -334,7 +330,7 @@ OPS_NO_JUMP = OPS_PUSH_BLOCK
 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
 OP_END_FINALLY = _opcode('END_FINALLY')
 OP_COMPARE_OP = _opcode('COMPARE_OP')
-COMPARE_EXCEPTION = 10  # just have to get this const from the code.
+COMPARE_EXCEPTION = 10  # just have to get this constant from the code.
 OP_LOAD_CONST = _opcode('LOAD_CONST')
 OP_RETURN_VALUE = _opcode('RETURN_VALUE')
 
@@ -342,13 +338,14 @@ OP_RETURN_VALUE = _opcode('RETURN_VALUE')
 class ByteParser(object):
     """Parse byte codes to understand the structure of code."""
 
+    @contract(text='unicode')
     def __init__(self, text, code=None, filename=None):
         self.text = text
         if code:
             self.code = code
         else:
             try:
-                self.code = compile(text, filename, "exec")
+                self.code = compile_unicode(text, filename, "exec")
             except SyntaxError as synerr:
                 raise NotPython(
                     "Couldn't parse '%s' as Python source: '%s' at line %d" % (
@@ -460,7 +457,7 @@ class ByteParser(object):
 
         # Walk the byte codes building chunks.
         for bc in bytecodes:
-            # Maybe have to start a new chunk
+            # Maybe have to start a new chunk.
             start_new_chunk = False
             first_chunk = False
             if bc.offset in bytes_lines_map:
@@ -481,9 +478,13 @@ class ByteParser(object):
                 if chunk:
                     chunk.exits.add(bc.offset)
                 chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
+                if not chunks:
+                    # The very first chunk of a code object is always an
+                    # entrance.
+                    chunk.entrance = True
                 chunks.append(chunk)
 
-            # Look at the opcode
+            # Look at the opcode.
             if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
                 if ignore_branch:
                     # Someone earlier wanted us to ignore this branch.
@@ -570,15 +571,15 @@ class ByteParser(object):
         """
         chunks = self._split_into_chunks()
 
-        # A map from byte offsets to chunks jumped into.
+        # A map from byte offsets to the chunk starting at that offset.
         byte_chunks = dict((c.byte, c) for c in chunks)
 
-        # There's always an entrance at the first chunk.
-        yield (-1, byte_chunks[0].line)
-
         # Traverse from the first chunk in each line, and yield arcs where
         # the trace function will be invoked.
         for chunk in chunks:
+            if chunk.entrance:
+                yield (-1, chunk.line)
+
             if not chunk.first:
                 continue
 
@@ -586,7 +587,7 @@ class ByteParser(object):
             chunks_to_consider = [chunk]
             while chunks_to_consider:
                 # Get the chunk we're considering, and make sure we don't
-                # consider it again
+                # consider it again.
                 this_chunk = chunks_to_consider.pop()
                 chunks_considered.add(this_chunk)
 
@@ -649,6 +650,8 @@ class Chunk(object):
 
     .. _basic block: http://en.wikipedia.org/wiki/Basic_block
 
+    `byte` is the offset to the bytecode starting this chunk.
+
     `line` is the source line number containing this chunk.
 
     `first` is true if this is the first chunk in the source line.
@@ -656,19 +659,24 @@ class Chunk(object):
     An exit < 0 means the chunk can leave the code (return).  The exit is
     the negative of the starting line number of the code block.
 
+    The `entrance` attribute is a boolean indicating whether the code object
+    can be entered at this chunk.
+
     """
     def __init__(self, byte, line, first):
         self.byte = byte
         self.line = line
         self.first = first
         self.length = 0
+        self.entrance = False
         self.exits = set()
 
     def __repr__(self):
-        if self.first:
-            bang = "!"
-        else:
-            bang = ""
-        return "<%d+%d @%d%s %r>" % (
-            self.byte, self.length, self.line, bang, list(self.exits)
+        return "<%d+%d @%d%s%s %r>" % (
+            self.byte,
+            self.length,
+            self.line,
+            "!" if self.first else "",
+            "v" if self.entrance else "",
+            list(self.exits),
             )