diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2009-03-13 07:48:44 -0400 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2009-03-13 07:48:44 -0400 |
commit | 67a7f3e65aab32763a2b3df2295c5c698ce001f0 (patch) | |
tree | 97a84e1940321193d6d38f43e07b15f77b56b309 /coverage/analyzer.py | |
parent | 678042ae6c558821f550f018896318348128ac25 (diff) | |
download | python-coveragepy-git-67a7f3e65aab32763a2b3df2295c5c698ce001f0.tar.gz |
CodeAnalyzer was a terminology conflict with coverage.analysis, and it's really more of a parser anyway.
--HG--
rename : coverage/analyzer.py => coverage/parser.py
Diffstat (limited to 'coverage/analyzer.py')
-rw-r--r-- | coverage/analyzer.py | 232 |
1 files changed, 0 insertions, 232 deletions
diff --git a/coverage/analyzer.py b/coverage/analyzer.py deleted file mode 100644 index 55dae7f7..00000000 --- a/coverage/analyzer.py +++ /dev/null @@ -1,232 +0,0 @@ -"""Code analysis for coverage.py""" - -import re, token, tokenize, types -import cStringIO as StringIO - -from coverage.misc import nice_pair, CoverageException - - -# Python version compatibility -try: - set() # new in 2.4 -except NameError: - import sets - set = sets.Set # pylint: disable-msg=W0622 - - -class CodeAnalyzer: - """Analyze code to find executable lines, excluded lines, etc.""" - - def __init__(self, show_tokens=False): - self.show_tokens = show_tokens - - # The text lines of the analyzed code. - self.lines = None - - # The line numbers of excluded lines of code. - self.excluded = set() - - # The line numbers of docstring lines. - self.docstrings = set() - - # A dict mapping line numbers to (lo,hi) for multi-line statements. - self.multiline = {} - - # The line numbers that start statements. - self.statement_starts = set() - - def find_statement_starts(self, code): - """Find the starts of statements in compiled code. - - Uses co_lnotab described in Python/compile.c to find line numbers that - start statements, adding them to `self.statement_starts`. - - """ - # Adapted from dis.py in the standard library. - byte_increments = [ord(c) for c in code.co_lnotab[0::2]] - line_increments = [ord(c) for c in code.co_lnotab[1::2]] - - last_line_num = None - line_num = code.co_firstlineno - for byte_incr, line_incr in zip(byte_increments, line_increments): - if byte_incr: - if line_num != last_line_num: - self.statement_starts.add(line_num) - last_line_num = line_num - line_num += line_incr - if line_num != last_line_num: - self.statement_starts.add(line_num) - - def find_statements(self, code): - """Find the statements in `code`. - - Update `self.statement_starts`, a set of line numbers that start - statements. Recurses into all code objects reachable from `code`. - - """ - # Adapted from trace.py in the standard library. - - # Get all of the lineno information from this code. - self.find_statement_starts(code) - - # Check the constants for references to other code objects. - for c in code.co_consts: - if isinstance(c, types.CodeType): - # Found another code object, so recurse into it. - self.find_statements(c) - - def raw_analyze(self, text=None, filename=None, exclude=None): - """Analyze `text` to find the interesting facts about its lines. - - A handful of member fields are updated. - - """ - if not text: - sourcef = open(filename, 'rU') - text = sourcef.read() - sourcef.close() - text = text.replace('\r\n', '\n') - self.lines = text.split('\n') - - # Find lines which match an exclusion pattern. - if exclude: - re_exclude = re.compile(exclude) - for i, ltext in enumerate(self.lines): - if re_exclude.search(ltext): - self.excluded.add(i+1) - - # Tokenize, to find excluded suites, to find docstrings, and to find - # multi-line statements. - indent = 0 - exclude_indent = 0 - excluding = False - prev_toktype = token.INDENT - first_line = None - - tokgen = tokenize.generate_tokens(StringIO.StringIO(text).readline) - for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: - if self.show_tokens: - print "%10s %5s %-20r %r" % ( - tokenize.tok_name.get(toktype, toktype), - nice_pair((slineno, elineno)), ttext, ltext - ) - if toktype == token.INDENT: - indent += 1 - elif toktype == token.DEDENT: - indent -= 1 - elif toktype == token.OP and ttext == ':': - if not excluding and elineno in self.excluded: - # Start excluding a suite. We trigger off of the colon - # token so that the #pragma comment will be recognized on - # the same line as the colon. - exclude_indent = indent - excluding = True - elif toktype == token.STRING and prev_toktype == token.INDENT: - # Strings that are first on an indented line are docstrings. - # (a trick from trace.py in the stdlib.) - for i in xrange(slineno, elineno+1): - self.docstrings.add(i) - elif toktype == token.NEWLINE: - if first_line is not None and elineno != first_line: - # We're at the end of a line, and we've ended on a - # different line than the first line of the statement, - # so record a multi-line range. - rng = (first_line, elineno) - for l in xrange(first_line, elineno+1): - self.multiline[l] = rng - first_line = None - - if ttext.strip() and toktype != tokenize.COMMENT: - # A non-whitespace token. - if first_line is None: - # The token is not whitespace, and is the first in a - # statement. - first_line = slineno - # Check whether to end an excluded suite. - if excluding and indent <= exclude_indent: - excluding = False - if excluding: - self.excluded.add(elineno) - - prev_toktype = toktype - - # Find the starts of the executable statements. - filename = filename or "<code>" - try: - # Python 2.3 and 2.4 don't like partial last lines, so be sure the - # text ends nicely for them. - text += '\n' - code = compile(text, filename, "exec") - except SyntaxError, synerr: - raise CoverageException( - "Couldn't parse '%s' as Python source: '%s' at line %d" % - (filename, synerr.msg, synerr.lineno) - ) - - self.find_statements(code) - - def map_to_first_line(self, lines, ignore=None): - """Map the line numbers in `lines` to the correct first line of the - statement. - - Skip any line mentioned in `ignore`. - - Returns a sorted list of the first lines. - - """ - ignore = ignore or [] - lset = set() - for l in lines: - if l in ignore: - continue - rng = self.multiline.get(l) - if rng: - new_l = rng[0] - else: - new_l = l - if new_l not in ignore: - lset.add(new_l) - lines = list(lset) - lines.sort() - return lines - - def analyze_source(self, text=None, filename=None, exclude=None): - """Analyze source text to find executable lines, excluded lines, etc. - - Source can be provided as `text`, the text itself, or `filename`, from - which text will be read. Excluded lines are those that match `exclude`, - a regex. - - Return values are 1) a sorted list of executable line numbers, - 2) a sorted list of excluded line numbers, and 3) a dict mapping line - numbers to pairs (lo,hi) for multi-line statements. - - """ - self.raw_analyze(text, filename, exclude) - - excluded_lines = self.map_to_first_line(self.excluded) - ignore = excluded_lines + list(self.docstrings) - lines = self.map_to_first_line(self.statement_starts, ignore) - - return lines, excluded_lines, self.multiline - - def print_analysis(self): - """Print the results of the analysis.""" - for i, ltext in enumerate(self.lines): - lineno = i+1 - m0 = m1 = m2 = ' ' - if lineno in self.statement_starts: - m0 = '-' - if lineno in self.docstrings: - m1 = '"' - if lineno in self.excluded: - m2 = 'x' - print "%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext) - - -if __name__ == '__main__': - import sys - - analyzer = CodeAnalyzer(show_tokens=True) - analyzer.raw_analyze(filename=sys.argv[1], exclude=r"no\s*cover") - analyzer.print_analysis() |