10 files changed, 206 insertions, 119 deletions
diff --git a/coverage/annotate.py b/coverage/annotate.py
index b7f32c1c..19777eaf 100644
--- a/coverage/annotate.py
+++ b/coverage/annotate.py
@@ -59,9 +59,9 @@ class AnnotateReporter(Reporter):
             dest_file = filename + ",cover"
         dest = open(dest_file, 'w')
 
-        statements = analysis.statements
-        missing = analysis.missing
-        excluded = analysis.excluded
+        statements = sorted(analysis.statements)
+        missing = sorted(analysis.missing)
+        excluded = sorted(analysis.excluded)
 
         lineno = 0
         i = 0
diff --git a/coverage/cmdline.py b/coverage/cmdline.py
index 93732839..c311976d 100644
--- a/coverage/cmdline.py
+++ b/coverage/cmdline.py
@@ -1,6 +1,6 @@
 """Command-line support for Coverage."""
 
-import optparse, os, sys, traceback
+import optparse, os, sys, time, traceback
 
 from coverage.execfile import run_python_file, run_python_module
 from coverage.misc import CoverageException, ExceptionDuringRun, NoSource
@@ -715,7 +715,11 @@ def main(argv=None):
     if argv is None:
         argv = sys.argv[1:]
     try:
+        start = time.clock()
         status = CoverageScript().command_line(argv)
+        end = time.clock()
+        if 0:
+            print("time: %.3fs" % (end - start))
     except ExceptionDuringRun as err:
         # An exception was caught while running the product code.  The
         # sys.exc_info() return tuple is packed into an ExceptionDuringRun
diff --git a/coverage/control.py b/coverage/control.py
index 51ff0439..fa6fec74 100644
--- a/coverage/control.py
+++ b/coverage/control.py
@@ -571,8 +571,11 @@ class coverage(object):
         """
         analysis = self._analyze(morf)
         return (
-            analysis.filename, analysis.statements, analysis.excluded,
-            analysis.missing, analysis.missing_formatted()
+            analysis.filename,
+            sorted(analysis.statements),
+            sorted(analysis.excluded),
+            sorted(analysis.missing),
+            analysis.missing_formatted(),
             )
 
     def _analyze(self, it):
diff --git a/coverage/html.py b/coverage/html.py
index e1966bfb..d168e351 100644
--- a/coverage/html.py
+++ b/coverage/html.py
@@ -176,7 +176,8 @@ class HtmlReporter(Reporter):
         # Get the numbers for this file.
         nums = analysis.numbers
 
-        missing_branch_arcs = analysis.missing_branch_arcs()
+        if self.arcs:
+            missing_branch_arcs = analysis.missing_branch_arcs()
 
         # These classes determine which lines are highlighted by default.
         c_run = "run hide_run"
diff --git a/coverage/misc.py b/coverage/misc.py
index c3fd9e2a..c88d4ecd 100644
--- a/coverage/misc.py
+++ b/coverage/misc.py
@@ -37,6 +37,8 @@ def format_lines(statements, lines):
     i = 0
     j = 0
     start = None
+    statements = sorted(statements)
+    lines = sorted(lines)
     while i < len(statements) and j < len(lines):
         if statements[i] == lines[j]:
             if start == None:
@@ -111,8 +113,10 @@ class Hasher(object):
         self.md5.update(to_bytes(str(type(v))))
         if isinstance(v, string_class):
             self.md5.update(to_bytes(v))
+        elif v is None:
+            pass
         elif isinstance(v, (int, float)):
-            self.update(str(v))
+            self.md5.update(to_bytes(str(v)))
         elif isinstance(v, (tuple, list)):
             for e in v:
                 self.update(e)
diff --git a/coverage/parser.py b/coverage/parser.py
index f2885c07..de6590aa 100644
--- a/coverage/parser.py
+++ b/coverage/parser.py
@@ -103,7 +103,7 @@ class CodeParser(object):
         first_line = None
         empty = True
 
-        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
+        tokgen = generate_tokens(self.text)
         for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
             if self.show_tokens:                # pragma: not covered
                 print("%10s %5s %-20r %r" % (
@@ -170,16 +170,18 @@ class CodeParser(object):
             first_line = line
         return first_line
 
-    def first_lines(self, lines, ignore=None):
+    def first_lines(self, lines, *ignores):
         """Map the line numbers in `lines` to the correct first line of the
         statement.
 
-        Skip any line mentioned in `ignore`.
+        Skip any line mentioned in any of the sequences in `ignores`.
 
-        Returns a sorted list of the first lines.
+        Returns a set of the first lines.
 
         """
-        ignore = ignore or []
+        ignore = set()
+        for ign in ignores:
+            ignore.update(ign)
         lset = set()
         for l in lines:
             if l in ignore:
@@ -187,13 +189,13 @@ class CodeParser(object):
             new_l = self.first_line(l)
             if new_l not in ignore:
                 lset.add(new_l)
-        return sorted(lset)
+        return lset
 
     def parse_source(self):
         """Parse source text to find executable lines, excluded lines, etc.
 
-        Return values are 1) a sorted list of executable line numbers, and
-        2) a sorted list of excluded line numbers.
+        Return values are 1) a set of executable line numbers, and 2) a set of
+        excluded line numbers.
 
         Reported line numbers are normalized to the first line of multi-line
         statements.
@@ -209,8 +211,11 @@ class CodeParser(object):
                 )
 
         excluded_lines = self.first_lines(self.excluded)
-        ignore = excluded_lines + list(self.docstrings)
-        lines = self.first_lines(self.statement_starts, ignore)
+        lines = self.first_lines(
+            self.statement_starts,
+            excluded_lines,
+            self.docstrings
+        )
 
         return lines, excluded_lines
 
@@ -432,14 +437,15 @@ class ByteParser(object):
 
         # Get a set of all of the jump-to points.
         jump_to = set()
-        for bc in ByteCodes(self.code.co_code):
+        bytecodes = list(ByteCodes(self.code.co_code))
+        for bc in bytecodes:
             if bc.jump_to >= 0:
                 jump_to.add(bc.jump_to)
 
         chunk_lineno = 0
 
         # Walk the byte codes building chunks.
-        for bc in ByteCodes(self.code.co_code):
+        for bc in bytecodes:
             # Maybe have to start a new chunk
             start_new_chunk = False
             first_chunk = False
@@ -652,3 +658,31 @@ class Chunk(object):
         return "<%d+%d @%d%s %r>" % (
             self.byte, self.length, self.line, bang, list(self.exits)
             )
+
+
+class CachedTokenizer(object):
+    """A one-element cache around tokenize.generate_tokens.
+
+    When reporting, coverage.py tokenizes files twice, once to find the
+    structure of the file, and once to syntax-color it.  Tokenizing is
+    expensive, and easily cached.
+
+    This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+    actually tokenize twice.
+
+    """
+    def __init__(self):
+        self.last_text = None
+        self.last_tokens = None
+
+    def generate_tokens(self, text):
+        """A stand-in for `tokenize.generate_tokens`."""
+        if text != self.last_text:
+            self.last_text = text
+            self.last_tokens = list(
+                tokenize.generate_tokens(StringIO(text).readline)
+            )
+        return self.last_tokens
+
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 58522413..e79ce01f 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -1,7 +1,8 @@
 """Better tokenizing for coverage.py."""
 
 import codecs, keyword, re, sys, token, tokenize
-from coverage.backward import StringIO              # pylint: disable=W0622
+from coverage.parser import generate_tokens
+
 
 def phys_tokens(toks):
     """Return all physical tokens, even line continuations.
@@ -18,7 +19,7 @@ def phys_tokens(toks):
     last_ttype = None
     for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks:
         if last_lineno != elineno:
-            if last_line and last_line[-2:] == "\\\n":
+            if last_line and last_line.endswith("\\\n"):
                 # We are at the beginning of a new line, and the last line
                 # ended with a backslash.  We probably have to inject a
                 # backslash token into the stream. Unfortunately, there's more
@@ -74,11 +75,11 @@ def source_token_lines(source):
     is indistinguishable from a final line with a newline.
 
     """
-    ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]
+    ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL])
     line = []
     col = 0
     source = source.expandtabs(8).replace('\r\n', '\n')
-    tokgen = tokenize.generate_tokens(StringIO(source).readline)
+    tokgen = generate_tokens(source)
     for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
         mark_start = True
         for part in re.split('(\n)', ttext):
diff --git a/coverage/results.py b/coverage/results.py
index e6475afb..0576ae1f 100644
--- a/coverage/results.py
+++ b/coverage/results.py
@@ -27,7 +27,7 @@ class Analysis(object):
         # Identify missing statements.
         executed = self.coverage.data.executed_lines(self.filename)
         exec1 = self.parser.first_lines(executed)
-        self.missing = sorted(set(self.statements) - set(exec1))
+        self.missing = self.statements - exec1
 
         if self.coverage.data.has_arcs():
             self.no_branch = self.parser.lines_matching(
diff --git a/coverage/templite.py b/coverage/templite.py
index c39e061e..429a5ccc 100644
--- a/coverage/templite.py
+++ b/coverage/templite.py
@@ -2,7 +2,51 @@
 
 # Coincidentally named the same as http://code.activestate.com/recipes/496702/
 
-import re, sys
+import re
+
+
+class CodeBuilder(object):
+    """Build source code conveniently."""
+
+    def __init__(self, indent=0):
+        self.code = []
+        self.indent_amount = indent
+
+    def add_line(self, line):
+        """Add a line of source to the code.
+
+        Don't include indentations or newlines.
+
+        """
+        self.code.append(" " * self.indent_amount)
+        self.code.append(line)
+        self.code.append("\n")
+
+    def add_section(self):
+        """Add a section, a sub-CodeBuilder."""
+        sect = CodeBuilder(self.indent_amount)
+        self.code.append(sect)
+        return sect
+
+    def indent(self):
+        """Increase the current indent for following lines."""
+        self.indent_amount += 4
+
+    def dedent(self):
+        """Decrease the current indent for following lines."""
+        self.indent_amount -= 4
+
+    def __str__(self):
+        return "".join([str(c) for c in self.code])
+
+    def get_function(self, fn_name):
+        """Compile the code, and return the function `fn_name`."""
+        assert self.indent_amount == 0
+        g = {}
+        code_text = str(self)
+        exec(code_text, g)
+        return g[fn_name]
+
 
 class Templite(object):
     """A simple template renderer, for a nano-subset of Django syntax.
@@ -39,53 +83,104 @@ class Templite(object):
         for context in contexts:
             self.context.update(context)
 
+        # We construct a function in source form, then compile it and hold onto
+        # it, and execute it to render the template.
+        code = CodeBuilder()
+
+        code.add_line("def render(ctx, dot):")
+        code.indent()
+        vars_code = code.add_section()
+        self.all_vars = set()
+        self.loop_vars = set()
+        code.add_line("result = []")
+        code.add_line("a = result.append")
+        code.add_line("e = result.extend")
+        code.add_line("s = str")
+
+        buffered = []
+        def flush_output():
+            """Force `buffered` to the code builder."""
+            if len(buffered) == 1:
+                code.add_line("a(%s)" % buffered[0])
+            elif len(buffered) > 1:
+                code.add_line("e([%s])" % ",".join(buffered))
+            del buffered[:]
+
         # Split the text to form a list of tokens.
         toks = re.split(r"(?s)({{.*?}}|{%.*?%}|{#.*?#})", text)
 
-        # Parse the tokens into a nested list of operations.  Each item in the
-        # list is a tuple with an opcode, and arguments.  They'll be
-        # interpreted by TempliteEngine.
-        #
-        # When parsing an action tag with nested content (if, for), the current
-        # ops list is pushed onto ops_stack, and the parsing continues in a new
-        # ops list that is part of the arguments to the if or for op.
-        ops = []
         ops_stack = []
         for tok in toks:
             if tok.startswith('{{'):
-                # Expression: ('exp', expr)
-                ops.append(('exp', tok[2:-2].strip()))
+                # An expression to evaluate.
+                buffered.append("s(%s)" % self.expr_code(tok[2:-2].strip()))
             elif tok.startswith('{#'):
                 # Comment: ignore it and move on.
                 continue
             elif tok.startswith('{%'):
                 # Action tag: split into words and parse further.
+                flush_output()
                 words = tok[2:-2].strip().split()
                 if words[0] == 'if':
-                    # If: ('if', (expr, body_ops))
-                    if_ops = []
+                    # An if statement: evaluate the expression to determine if.
                     assert len(words) == 2
-                    ops.append(('if', (words[1], if_ops)))
-                    ops_stack.append(ops)
-                    ops = if_ops
+                    ops_stack.append('if')
+                    code.add_line("if %s:" % self.expr_code(words[1]))
+                    code.indent()
                 elif words[0] == 'for':
-                    # For: ('for', (varname, listexpr, body_ops))
+                    # A loop: iterate over expression result.
                     assert len(words) == 4 and words[2] == 'in'
-                    for_ops = []
-                    ops.append(('for', (words[1], words[3], for_ops)))
-                    ops_stack.append(ops)
-                    ops = for_ops
+                    ops_stack.append('for')
+                    self.loop_vars.add(words[1])
+                    code.add_line(
+                        "for c_%s in %s:" % (
+                            words[1],
+                            self.expr_code(words[3])
+                        )
+                    )
+                    code.indent()
                 elif words[0].startswith('end'):
                     # Endsomething.  Pop the ops stack
-                    ops = ops_stack.pop()
-                    assert ops[-1][0] == words[0][3:]
+                    end_what = words[0][3:]
+                    if ops_stack[-1] != end_what:
+                        raise SyntaxError("Mismatched end tag: %r" % end_what)
+                    ops_stack.pop()
+                    code.dedent()
                 else:
-                    raise SyntaxError("Don't understand tag %r" % words)
+                    raise SyntaxError("Don't understand tag: %r" % words[0])
             else:
-                ops.append(('lit', tok))
+                # Literal content.  If it isn't empty, output it.
+                if tok:
+                    buffered.append("%r" % tok)
+        flush_output()
+
+        for var_name in self.all_vars - self.loop_vars:
+            vars_code.add_line("c_%s = ctx[%r]" % (var_name, var_name))
 
-        assert not ops_stack, "Unmatched action tag: %r" % ops_stack[-1][0]
-        self.ops = ops
+        if ops_stack:
+            raise SyntaxError("Unmatched action tag: %r" % ops_stack[-1])
+
+        code.add_line("return ''.join(result)")
+        code.dedent()
+        self.render_function = code.get_function('render')
+
+    def expr_code(self, expr):
+        """Generate a Python expression for `expr`."""
+        if "|" in expr:
+            pipes = expr.split("|")
+            code = self.expr_code(pipes[0])
+            for func in pipes[1:]:
+                self.all_vars.add(func)
+                code = "c_%s(%s)" % (func, code)
+        elif "." in expr:
+            dots = expr.split(".")
+            code = self.expr_code(dots[0])
+            args = [repr(d) for d in dots[1:]]
+            code = "dot(%s, %s)" % (code, ", ".join(args))
+        else:
+            self.all_vars.add(expr)
+            code = "c_%s" % expr
+        return code
 
     def render(self, context=None):
         """Render this template by applying it to `context`.
@@ -97,70 +192,15 @@ class Templite(object):
         ctx = dict(self.context)
         if context:
             ctx.update(context)
-
-        # Run it through an engine, and return the result.
-        engine = _TempliteEngine(ctx)
-        engine.execute(self.ops)
-        return "".join(engine.result)
-
-
-class _TempliteEngine(object):
-    """Executes Templite objects to produce strings."""
-    def __init__(self, context):
-        self.context = context
-        self.result = []
-
-    def execute(self, ops):
-        """Execute `ops` in the engine.
-
-        Called recursively for the bodies of if's and loops.
-
-        """
-        for op, args in ops:
-            if op == 'lit':
-                self.result.append(args)
-            elif op == 'exp':
-                try:
-                    self.result.append(str(self.evaluate(args)))
-                except:
-                    exc_class, exc, _ = sys.exc_info()
-                    new_exc = exc_class("Couldn't evaluate {{ %s }}: %s"
-                                        % (args, exc))
-                    raise new_exc
-            elif op == 'if':
-                expr, body = args
-                if self.evaluate(expr):
-                    self.execute(body)
-            elif op == 'for':
-                var, lis, body = args
-                vals = self.evaluate(lis)
-                for val in vals:
-                    self.context[var] = val
-                    self.execute(body)
-            else:
-                raise AssertionError("TempliteEngine doesn't grok op %r" % op)
-
-    def evaluate(self, expr):
-        """Evaluate an expression.
-
-        `expr` can have pipes and dots to indicate data access and filtering.
-
-        """
-        if "|" in expr:
-            pipes = expr.split("|")
-            value = self.evaluate(pipes[0])
-            for func in pipes[1:]:
-                value = self.evaluate(func)(value)
-        elif "." in expr:
-            dots = expr.split('.')
-            value = self.evaluate(dots[0])
-            for dot in dots[1:]:
-                try:
-                    value = getattr(value, dot)
-                except AttributeError:
-                    value = value[dot]
-                if hasattr(value, '__call__'):
-                    value = value()
-        else:
-            value = self.context[expr]
+        return self.render_function(ctx, self.do_dots)
+
+    def do_dots(self, value, *dots):
+        """Evaluate dotted expressions at runtime."""
+        for dot in dots:
+            try:
+                value = getattr(value, dot)
+            except AttributeError:
+                value = value[dot]
+            if hasattr(value, '__call__'):
+                value = value()
         return value
diff --git a/coverage/xmlreport.py b/coverage/xmlreport.py
index d4b102fa..f5a4c1ba 100644
--- a/coverage/xmlreport.py
+++ b/coverage/xmlreport.py
@@ -116,7 +116,7 @@ class XmlReporter(Reporter):
         branch_stats = analysis.branch_stats()
 
         # For each statement, create an XML 'line' element.
-        for line in analysis.statements:
+        for line in sorted(analysis.statements):
             xline = self.xml_out.createElement("line")
             xline.setAttribute("number", str(line))