diff options
author | Ned Batchelder <ned@nedbatchelder.com> | 2014-12-14 08:26:06 -0500 |
---|---|---|
committer | Ned Batchelder <ned@nedbatchelder.com> | 2014-12-14 08:26:06 -0500 |
commit | 8d10339238d0115225245d2b9a90579f329ec22f (patch) | |
tree | bfd29ff5b9c36ff63ab617882c48840c9956c0ee /coverage/phystokens.py | |
parent | 52f0f80fb552789c79d536a8aca265da04143a58 (diff) | |
download | python-coveragepy-8d10339238d0115225245d2b9a90579f329ec22f.tar.gz |
Move some code, and fix pep8 things
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r-- | coverage/phystokens.py | 53 |
1 files changed, 50 insertions, 3 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py index 3fd1165..4faa3c3 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -1,8 +1,11 @@ """Better tokenizing for coverage.py.""" -import codecs, keyword, re, sys, token, tokenize - -from coverage.parser import generate_tokens +import codecs +import keyword +import re +import sys +import token +import tokenize def phys_tokens(toks): @@ -111,6 +114,39 @@ def source_token_lines(source): yield line +class CachedTokenizer(object): + """A one-element cache around tokenize.generate_tokens. + + When reporting, coverage.py tokenizes files twice, once to find the + structure of the file, and once to syntax-color it. Tokenizing is + expensive, and easily cached. + + This is a one-element cache so that our twice-in-a-row tokenizing doesn't + actually tokenize twice. + + """ + def __init__(self): + self.last_text = None + self.last_tokens = None + + def generate_tokens(self, text): + """A stand-in for `tokenize.generate_tokens`.""" + # Check the type first so we don't compare bytes to unicode and get + # warnings. + if type(text) != type(self.last_text) or text != self.last_text: + self.last_text = text + line_iter = iter(text.splitlines(True)) + try: + readline = line_iter.next + except AttributeError: + readline = line_iter.__next__ + self.last_tokens = list(tokenize.generate_tokens(readline)) + return self.last_tokens + +# Create our generate_tokens cache as a callable replacement function. +generate_tokens = CachedTokenizer().generate_tokens + + def source_encoding(source): """Determine the encoding for `source` (a string), according to PEP 263. @@ -205,3 +241,14 @@ def source_encoding(source): return encoding return default + + +# Reading Python source and interpreting the coding comment is a big deal. +if sys.version_info >= (3, 0): + # Python 3.2 provides `tokenize.open`, the best way to open source files. + import tokenize + open_python_source = tokenize.open +else: + def open_python_source(fname): + """Open a source file the best way.""" + return open(fname, "rU") |