summaryrefslogtreecommitdiff
path: root/coverage/analyzer.py
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2009-03-13 07:48:44 -0400
committerNed Batchelder <ned@nedbatchelder.com>2009-03-13 07:48:44 -0400
commit0827ad1c07f6c7e1eb14b7b99484f0e141192642 (patch)
treef8d27fe482ece2f77de9cf033787e60bff872bed /coverage/analyzer.py
parent1aff296970383b278915a4be54ba6bb7edf21aed (diff)
downloadpython-coveragepy-0827ad1c07f6c7e1eb14b7b99484f0e141192642.tar.gz
CodeAnalyzer was a terminology conflict with coverage.analysis, and it's really more of a parser anyway.
Diffstat (limited to 'coverage/analyzer.py')
-rw-r--r--coverage/analyzer.py232
1 files changed, 0 insertions, 232 deletions
diff --git a/coverage/analyzer.py b/coverage/analyzer.py
deleted file mode 100644
index 55dae7f..0000000
--- a/coverage/analyzer.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""Code analysis for coverage.py"""
-
-import re, token, tokenize, types
-import cStringIO as StringIO
-
-from coverage.misc import nice_pair, CoverageException
-
-
-# Python version compatibility
-try:
- set() # new in 2.4
-except NameError:
- import sets
- set = sets.Set # pylint: disable-msg=W0622
-
-
-class CodeAnalyzer:
- """Analyze code to find executable lines, excluded lines, etc."""
-
- def __init__(self, show_tokens=False):
- self.show_tokens = show_tokens
-
- # The text lines of the analyzed code.
- self.lines = None
-
- # The line numbers of excluded lines of code.
- self.excluded = set()
-
- # The line numbers of docstring lines.
- self.docstrings = set()
-
- # A dict mapping line numbers to (lo,hi) for multi-line statements.
- self.multiline = {}
-
- # The line numbers that start statements.
- self.statement_starts = set()
-
- def find_statement_starts(self, code):
- """Find the starts of statements in compiled code.
-
- Uses co_lnotab described in Python/compile.c to find line numbers that
- start statements, adding them to `self.statement_starts`.
-
- """
- # Adapted from dis.py in the standard library.
- byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
- line_increments = [ord(c) for c in code.co_lnotab[1::2]]
-
- last_line_num = None
- line_num = code.co_firstlineno
- for byte_incr, line_incr in zip(byte_increments, line_increments):
- if byte_incr:
- if line_num != last_line_num:
- self.statement_starts.add(line_num)
- last_line_num = line_num
- line_num += line_incr
- if line_num != last_line_num:
- self.statement_starts.add(line_num)
-
- def find_statements(self, code):
- """Find the statements in `code`.
-
- Update `self.statement_starts`, a set of line numbers that start
- statements. Recurses into all code objects reachable from `code`.
-
- """
- # Adapted from trace.py in the standard library.
-
- # Get all of the lineno information from this code.
- self.find_statement_starts(code)
-
- # Check the constants for references to other code objects.
- for c in code.co_consts:
- if isinstance(c, types.CodeType):
- # Found another code object, so recurse into it.
- self.find_statements(c)
-
- def raw_analyze(self, text=None, filename=None, exclude=None):
- """Analyze `text` to find the interesting facts about its lines.
-
- A handful of member fields are updated.
-
- """
- if not text:
- sourcef = open(filename, 'rU')
- text = sourcef.read()
- sourcef.close()
- text = text.replace('\r\n', '\n')
- self.lines = text.split('\n')
-
- # Find lines which match an exclusion pattern.
- if exclude:
- re_exclude = re.compile(exclude)
- for i, ltext in enumerate(self.lines):
- if re_exclude.search(ltext):
- self.excluded.add(i+1)
-
- # Tokenize, to find excluded suites, to find docstrings, and to find
- # multi-line statements.
- indent = 0
- exclude_indent = 0
- excluding = False
- prev_toktype = token.INDENT
- first_line = None
-
- tokgen = tokenize.generate_tokens(StringIO.StringIO(text).readline)
- for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
- if self.show_tokens:
- print "%10s %5s %-20r %r" % (
- tokenize.tok_name.get(toktype, toktype),
- nice_pair((slineno, elineno)), ttext, ltext
- )
- if toktype == token.INDENT:
- indent += 1
- elif toktype == token.DEDENT:
- indent -= 1
- elif toktype == token.OP and ttext == ':':
- if not excluding and elineno in self.excluded:
- # Start excluding a suite. We trigger off of the colon
- # token so that the #pragma comment will be recognized on
- # the same line as the colon.
- exclude_indent = indent
- excluding = True
- elif toktype == token.STRING and prev_toktype == token.INDENT:
- # Strings that are first on an indented line are docstrings.
- # (a trick from trace.py in the stdlib.)
- for i in xrange(slineno, elineno+1):
- self.docstrings.add(i)
- elif toktype == token.NEWLINE:
- if first_line is not None and elineno != first_line:
- # We're at the end of a line, and we've ended on a
- # different line than the first line of the statement,
- # so record a multi-line range.
- rng = (first_line, elineno)
- for l in xrange(first_line, elineno+1):
- self.multiline[l] = rng
- first_line = None
-
- if ttext.strip() and toktype != tokenize.COMMENT:
- # A non-whitespace token.
- if first_line is None:
- # The token is not whitespace, and is the first in a
- # statement.
- first_line = slineno
- # Check whether to end an excluded suite.
- if excluding and indent <= exclude_indent:
- excluding = False
- if excluding:
- self.excluded.add(elineno)
-
- prev_toktype = toktype
-
- # Find the starts of the executable statements.
- filename = filename or "<code>"
- try:
- # Python 2.3 and 2.4 don't like partial last lines, so be sure the
- # text ends nicely for them.
- text += '\n'
- code = compile(text, filename, "exec")
- except SyntaxError, synerr:
- raise CoverageException(
- "Couldn't parse '%s' as Python source: '%s' at line %d" %
- (filename, synerr.msg, synerr.lineno)
- )
-
- self.find_statements(code)
-
- def map_to_first_line(self, lines, ignore=None):
- """Map the line numbers in `lines` to the correct first line of the
- statement.
-
- Skip any line mentioned in `ignore`.
-
- Returns a sorted list of the first lines.
-
- """
- ignore = ignore or []
- lset = set()
- for l in lines:
- if l in ignore:
- continue
- rng = self.multiline.get(l)
- if rng:
- new_l = rng[0]
- else:
- new_l = l
- if new_l not in ignore:
- lset.add(new_l)
- lines = list(lset)
- lines.sort()
- return lines
-
- def analyze_source(self, text=None, filename=None, exclude=None):
- """Analyze source text to find executable lines, excluded lines, etc.
-
- Source can be provided as `text`, the text itself, or `filename`, from
- which text will be read. Excluded lines are those that match `exclude`,
- a regex.
-
- Return values are 1) a sorted list of executable line numbers,
- 2) a sorted list of excluded line numbers, and 3) a dict mapping line
- numbers to pairs (lo,hi) for multi-line statements.
-
- """
- self.raw_analyze(text, filename, exclude)
-
- excluded_lines = self.map_to_first_line(self.excluded)
- ignore = excluded_lines + list(self.docstrings)
- lines = self.map_to_first_line(self.statement_starts, ignore)
-
- return lines, excluded_lines, self.multiline
-
- def print_analysis(self):
- """Print the results of the analysis."""
- for i, ltext in enumerate(self.lines):
- lineno = i+1
- m0 = m1 = m2 = ' '
- if lineno in self.statement_starts:
- m0 = '-'
- if lineno in self.docstrings:
- m1 = '"'
- if lineno in self.excluded:
- m2 = 'x'
- print "%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext)
-
-
-if __name__ == '__main__':
- import sys
-
- analyzer = CodeAnalyzer(show_tokens=True)
- analyzer.raw_analyze(filename=sys.argv[1], exclude=r"no\s*cover")
- analyzer.print_analysis()