summaryrefslogtreecommitdiff
path: root/tests/test_phystokens.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_phystokens.py')
-rw-r--r--tests/test_phystokens.py79
1 files changed, 79 insertions, 0 deletions
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
new file mode 100644
index 00000000..e4834e4c
--- /dev/null
+++ b/tests/test_phystokens.py
@@ -0,0 +1,79 @@
+"""Tests for Coverage.py's improved tokenizer."""
+
+import os, re
+from test.coveragetest import CoverageTest
+from coverage.phystokens import source_token_lines
+
+
+SIMPLE = """\
+# yay!
+def foo():
+ say('two = %d' % 2)
+"""
+
+MIXED_WS = """\
+def hello():
+ a="Hello world!"
+\tb="indented"
+"""
+
+HERE = os.path.split(__file__)[0]
+
+
+class PhysTokensTest(CoverageTest):
+ """Tests for Coverage.py's improver tokenizer."""
+
+ run_in_temp_dir = False
+
+ def check_tokenization(self, source):
+ """Tokenize `source`, then put it back together, should be the same."""
+ tokenized = ""
+ for line in source_token_lines(source):
+ text = "".join([t for _,t in line])
+ tokenized += text + "\n"
+ # source_token_lines doesn't preserve trailing spaces, so trim all that
+ # before comparing.
+ source = source.replace('\r\n', '\n')
+ source = re.sub(r"(?m)[ \t]+$", "", source)
+ tokenized = re.sub(r"(?m)[ \t]+$", "", tokenized)
+ self.assertMultiLineEqual(source, tokenized)
+
+ def check_file_tokenization(self, fname):
+ """Use the contents of `fname` for `check_tokenization`."""
+ self.check_tokenization(open(fname).read())
+
+ def test_simple(self):
+ self.assertEqual(list(source_token_lines(SIMPLE)),
+ [
+ [('com', "# yay!")],
+ [('key', 'def'), ('ws', ' '), ('nam', 'foo'), ('op', '('),
+ ('op', ')'), ('op', ':')],
+ [('ws', ' '), ('nam', 'say'), ('op', '('),
+ ('str', "'two = %d'"), ('ws', ' '), ('op', '%'),
+ ('ws', ' '), ('num', '2'), ('op', ')')]
+ ])
+ self.check_tokenization(SIMPLE)
+
+ def test_tab_indentation(self):
+ # Mixed tabs and spaces...
+ self.assertEqual(list(source_token_lines(MIXED_WS)),
+ [
+ [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('),
+ ('op', ')'), ('op', ':')],
+ [('ws', ' '), ('nam', 'a'), ('op', '='),
+ ('str', '"Hello world!"')],
+ [('ws', ' '), ('nam', 'b'), ('op', '='),
+ ('str', '"indented"')],
+ ])
+
+ def test_tokenize_real_file(self):
+ # Check the tokenization of a real file (large, btw).
+ real_file = os.path.join(HERE, "test_coverage.py")
+ self.check_file_tokenization(real_file)
+
+ def test_stress(self):
+ # Check the tokenization of a stress-test file.
+ stress = os.path.join(HERE, "stress_phystoken.tok")
+ self.check_file_tokenization(stress)
+ stress = os.path.join(HERE, "stress_phystoken_dos.tok")
+ self.check_file_tokenization(stress)