CodeAnalyzer was a terminology conflict with coverage.analysis, and it's really more of a parser anyway.

--HG-- rename : coverage/analyzer.py => coverage/parser.py
author: Ned Batchelder <ned@nedbatchelder.com> 2009-03-13 07:48:44 -0400
committer: Ned Batchelder <ned@nedbatchelder.com> 2009-03-13 07:48:44 -0400
commit: 67a7f3e65aab32763a2b3df2295c5c698ce001f0 (patch)
tree: 97a84e1940321193d6d38f43e07b15f77b56b309 /coverage/parser.py
parent: 678042ae6c558821f550f018896318348128ac25 (diff)
download: python-coveragepy-git-67a7f3e65aab32763a2b3df2295c5c698ce001f0.tar.gz
1 files changed, 232 insertions, 0 deletions
diff --git a/coverage/parser.py b/coverage/parser.py
new file mode 100644
index 00000000..b1997b11
--- /dev/null
+++ b/coverage/parser.py
@@ -0,0 +1,232 @@
+"""Code parsing for coverage.py"""
+
+import re, token, tokenize, types
+import cStringIO as StringIO
+
+from coverage.misc import nice_pair, CoverageException
+
+
+# Python version compatibility
+try:
+    set()       # new in 2.4
+except NameError:
+    import sets
+    set = sets.Set      # pylint: disable-msg=W0622
+    
+
+class CodeParser:
+    """Parse code to find executable lines, excluded lines, etc."""
+    
+    def __init__(self, show_tokens=False):
+        self.show_tokens = show_tokens
+
+        # The text lines of the parsed code.
+        self.lines = None
+
+        # The line numbers of excluded lines of code.
+        self.excluded = set()
+        
+        # The line numbers of docstring lines.
+        self.docstrings = set()
+        
+        # A dict mapping line numbers to (lo,hi) for multi-line statements.
+        self.multiline = {}
+        
+        # The line numbers that start statements.
+        self.statement_starts = set()
+
+    def find_statement_starts(self, code):
+        """Find the starts of statements in compiled code.
+    
+        Uses co_lnotab described in Python/compile.c to find line numbers that
+        start statements, adding them to `self.statement_starts`.
+    
+        """
+        # Adapted from dis.py in the standard library.
+        byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
+        line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+    
+        last_line_num = None
+        line_num = code.co_firstlineno
+        for byte_incr, line_incr in zip(byte_increments, line_increments):
+            if byte_incr:
+                if line_num != last_line_num:
+                    self.statement_starts.add(line_num)
+                    last_line_num = line_num
+            line_num += line_incr
+        if line_num != last_line_num:
+            self.statement_starts.add(line_num)
+
+    def find_statements(self, code):
+        """Find the statements in `code`.
+        
+        Update `self.statement_starts`, a set of line numbers that start
+        statements.  Recurses into all code objects reachable from `code`.
+        
+        """
+        # Adapted from trace.py in the standard library.
+
+        # Get all of the lineno information from this code.
+        self.find_statement_starts(code)
+    
+        # Check the constants for references to other code objects.
+        for c in code.co_consts:
+            if isinstance(c, types.CodeType):
+                # Found another code object, so recurse into it.
+                self.find_statements(c)
+
+    def raw_parse(self, text=None, filename=None, exclude=None):
+        """Parse `text` to find the interesting facts about its lines.
+        
+        A handful of member fields are updated.
+        
+        """
+        if not text:
+            sourcef = open(filename, 'rU')
+            text = sourcef.read()
+            sourcef.close()
+        text = text.replace('\r\n', '\n')
+        self.lines = text.split('\n')
+
+        # Find lines which match an exclusion pattern.
+        if exclude:
+            re_exclude = re.compile(exclude)
+            for i, ltext in enumerate(self.lines):
+                if re_exclude.search(ltext):
+                    self.excluded.add(i+1)
+    
+        # Tokenize, to find excluded suites, to find docstrings, and to find
+        # multi-line statements.
+        indent = 0
+        exclude_indent = 0
+        excluding = False
+        prev_toktype = token.INDENT
+        first_line = None
+
+        tokgen = tokenize.generate_tokens(StringIO.StringIO(text).readline)
+        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
+            if self.show_tokens:
+                print "%10s %5s %-20r %r" % (
+                    tokenize.tok_name.get(toktype, toktype),
+                    nice_pair((slineno, elineno)), ttext, ltext
+                    )
+            if toktype == token.INDENT:
+                indent += 1
+            elif toktype == token.DEDENT:
+                indent -= 1
+            elif toktype == token.OP and ttext == ':':
+                if not excluding and elineno in self.excluded:
+                    # Start excluding a suite.  We trigger off of the colon
+                    # token so that the #pragma comment will be recognized on
+                    # the same line as the colon.
+                    exclude_indent = indent
+                    excluding = True
+            elif toktype == token.STRING and prev_toktype == token.INDENT:
+                # Strings that are first on an indented line are docstrings.
+                # (a trick from trace.py in the stdlib.)
+                for i in xrange(slineno, elineno+1):
+                    self.docstrings.add(i)
+            elif toktype == token.NEWLINE:
+                if first_line is not None and elineno != first_line:
+                    # We're at the end of a line, and we've ended on a
+                    # different line than the first line of the statement,
+                    # so record a multi-line range.
+                    rng = (first_line, elineno)
+                    for l in xrange(first_line, elineno+1):
+                        self.multiline[l] = rng
+                first_line = None
+                
+            if ttext.strip() and toktype != tokenize.COMMENT:
+                # A non-whitespace token.
+                if first_line is None:
+                    # The token is not whitespace, and is the first in a
+                    # statement.
+                    first_line = slineno
+                    # Check whether to end an excluded suite.
+                    if excluding and indent <= exclude_indent:
+                        excluding = False
+                    if excluding:
+                        self.excluded.add(elineno)
+                        
+            prev_toktype = toktype
+
+        # Find the starts of the executable statements.
+        filename = filename or "<code>"
+        try:
+            # Python 2.3 and 2.4 don't like partial last lines, so be sure the
+            # text ends nicely for them.
+            text += '\n'
+            code = compile(text, filename, "exec")
+        except SyntaxError, synerr:
+            raise CoverageException(
+                "Couldn't parse '%s' as Python source: '%s' at line %d" %
+                    (filename, synerr.msg, synerr.lineno)
+                )
+
+        self.find_statements(code)
+
+    def map_to_first_line(self, lines, ignore=None):
+        """Map the line numbers in `lines` to the correct first line of the
+        statement.
+        
+        Skip any line mentioned in `ignore`.
+        
+        Returns a sorted list of the first lines.
+        
+        """
+        ignore = ignore or []
+        lset = set()
+        for l in lines:
+            if l in ignore:
+                continue
+            rng = self.multiline.get(l)
+            if rng:
+                new_l = rng[0]
+            else:
+                new_l = l
+            if new_l not in ignore:
+                lset.add(new_l)
+        lines = list(lset)
+        lines.sort()
+        return lines
+    
+    def parse_source(self, text=None, filename=None, exclude=None):
+        """Parse source text to find executable lines, excluded lines, etc.
+        
+        Source can be provided as `text`, the text itself, or `filename`, from
+        which text will be read.  Excluded lines are those that match `exclude`,
+        a regex.
+        
+        Return values are 1) a sorted list of executable line numbers,
+        2) a sorted list of excluded line numbers, and 3) a dict mapping line
+        numbers to pairs (lo,hi) for multi-line statements.
+        
+        """
+        self.raw_parse(text, filename, exclude)
+        
+        excluded_lines = self.map_to_first_line(self.excluded)
+        ignore = excluded_lines + list(self.docstrings)
+        lines = self.map_to_first_line(self.statement_starts, ignore)
+    
+        return lines, excluded_lines, self.multiline
+
+    def print_parse_results(self):
+        """Print the results of the parsing."""
+        for i, ltext in enumerate(self.lines):
+            lineno = i+1
+            m0 = m1 = m2 = ' '
+            if lineno in self.statement_starts:
+                m0 = '-'
+            if lineno in self.docstrings:
+                m1 = '"'
+            if lineno in self.excluded:
+                m2 = 'x'
+            print "%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext)
+
+
+if __name__ == '__main__':
+    import sys
+    
+    parser = CodeParser(show_tokens=True)
+    parser.raw_parse(filename=sys.argv[1], exclude=r"no\s*cover")
+    parser.print_parse_results()
author	Ned Batchelder <ned@nedbatchelder.com>	2009-03-13 07:48:44 -0400
committer	Ned Batchelder <ned@nedbatchelder.com>	2009-03-13 07:48:44 -0400
commit	67a7f3e65aab32763a2b3df2295c5c698ce001f0 (patch)
tree	97a84e1940321193d6d38f43e07b15f77b56b309 /coverage/parser.py
parent	678042ae6c558821f550f018896318348128ac25 (diff)
download	python-coveragepy-git-67a7f3e65aab32763a2b3df2295c5c698ce001f0.tar.gz