summaryrefslogtreecommitdiff
path: root/coverage/phystokens.py
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2014-12-14 08:26:06 -0500
committerNed Batchelder <ned@nedbatchelder.com>2014-12-14 08:26:06 -0500
commit149e76b9dfd4801ee752cef5513b88e408bf7119 (patch)
tree0a1889e2900c0cde60758d93bc1ad7359e4d6279 /coverage/phystokens.py
parentbecff20dbf026e4e0f260b44a377c9083a8e0243 (diff)
downloadpython-coveragepy-git-149e76b9dfd4801ee752cef5513b88e408bf7119.tar.gz
Move some code, and fix pep8 things
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r--coverage/phystokens.py53
1 files changed, 50 insertions, 3 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 3fd1165c..4faa3c3f 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -1,8 +1,11 @@
"""Better tokenizing for coverage.py."""
-import codecs, keyword, re, sys, token, tokenize
-
-from coverage.parser import generate_tokens
+import codecs
+import keyword
+import re
+import sys
+import token
+import tokenize
def phys_tokens(toks):
@@ -111,6 +114,39 @@ def source_token_lines(source):
yield line
+class CachedTokenizer(object):
+ """A one-element cache around tokenize.generate_tokens.
+
+ When reporting, coverage.py tokenizes files twice, once to find the
+ structure of the file, and once to syntax-color it. Tokenizing is
+ expensive, and easily cached.
+
+ This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+ actually tokenize twice.
+
+ """
+ def __init__(self):
+ self.last_text = None
+ self.last_tokens = None
+
+ def generate_tokens(self, text):
+ """A stand-in for `tokenize.generate_tokens`."""
+ # Check the type first so we don't compare bytes to unicode and get
+ # warnings.
+ if type(text) != type(self.last_text) or text != self.last_text:
+ self.last_text = text
+ line_iter = iter(text.splitlines(True))
+ try:
+ readline = line_iter.next
+ except AttributeError:
+ readline = line_iter.__next__
+ self.last_tokens = list(tokenize.generate_tokens(readline))
+ return self.last_tokens
+
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens
+
+
def source_encoding(source):
"""Determine the encoding for `source` (a string), according to PEP 263.
@@ -205,3 +241,14 @@ def source_encoding(source):
return encoding
return default
+
+
+# Reading Python source and interpreting the coding comment is a big deal.
+if sys.version_info >= (3, 0):
+ # Python 3.2 provides `tokenize.open`, the best way to open source files.
+ import tokenize
+ open_python_source = tokenize.open
+else:
+ def open_python_source(fname):
+ """Open a source file the best way."""
+ return open(fname, "rU")