diff options
-rw-r--r-- | coverage/env.py | 4 | ||||
-rw-r--r-- | coverage/phystokens.py | 39 | ||||
-rw-r--r-- | tests/test_phystokens.py | 36 |
3 files changed, 76 insertions, 3 deletions
diff --git a/coverage/env.py b/coverage/env.py index 89abbb2e..c300f802 100644 --- a/coverage/env.py +++ b/coverage/env.py @@ -105,6 +105,10 @@ class PYBEHAVIOR: # Match-case construct. match_case = (PYVERSION >= (3, 10)) + # Some words are keywords in some places, identifiers in other places. + soft_keywords = (PYVERSION >= (3, 10)) + + # Coverage.py specifics. # Are we using the C-implemented trace function? diff --git a/coverage/phystokens.py b/coverage/phystokens.py index 52c2aa06..f06c0c27 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -3,11 +3,13 @@ """Better tokenizing for coverage.py.""" +import ast import keyword import re import token import tokenize +from coverage import env from coverage.misc import contract @@ -66,6 +68,21 @@ def phys_tokens(toks): last_lineno = elineno +class MatchCaseFinder(ast.NodeVisitor): + """Helper for finding match/case lines.""" + def __init__(self, source): + # This will be the set of line numbers that start match or case statements. + self.match_case_lines = set() + self.visit(ast.parse(source)) + + def visit_Match(self, node): + """Invoked by ast.NodeVisitor.visit""" + self.match_case_lines.add(node.lineno) + for case in node.cases: + self.match_case_lines.add(case.pattern.lineno) + self.generic_visit(node) + + @contract(source='unicode') def source_token_lines(source): """Generate a series of lines, one for each line in `source`. @@ -90,7 +107,10 @@ def source_token_lines(source): source = source.expandtabs(8).replace('\r\n', '\n') tokgen = generate_tokens(source) - for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): + if env.PYBEHAVIOR.soft_keywords: + match_case_lines = MatchCaseFinder(source).match_case_lines + + for ttype, ttext, (sline, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): if part == '\n': @@ -107,8 +127,21 @@ def source_token_lines(source): line.append(("ws", " " * (scol - col))) mark_start = False tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] - if ttype == token.NAME and keyword.iskeyword(ttext): - tok_class = "key" + if ttype == token.NAME: + if keyword.iskeyword(ttext): + # Hard keywords are always keywords. + tok_class = "key" + elif env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext): + # Soft keywords appear at the start of the line, on lines that start + # match or case statements. + if len(line) == 0: + is_start_of_line = True + elif (len(line) == 1) and line[0][0] == "ws": + is_start_of_line = True + else: + is_start_of_line = False + if is_start_of_line and sline in match_case_lines: + tok_class = "key" line.append((tok_class, part)) mark_end = True scol = 0 diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py index 82b887e6..3c214c63 100644 --- a/tests/test_phystokens.py +++ b/tests/test_phystokens.py @@ -103,6 +103,42 @@ class PhysTokensTest(CoverageTest): self.check_file_tokenization(stress) +@pytest.mark.skipif(not env.PYBEHAVIOR.soft_keywords, reason="Soft keywords are new in Python 3.10") +class SoftKeywordTest(CoverageTest): + """Tests the tokenizer handling soft keywords.""" + + run_in_temp_dir = False + + def test_soft_keywords(self): + source = textwrap.dedent("""\ + match re.match(something): + case ["what"]: + match = case("hello") + case [_]: + match("hello") + match another.thing: + case 1: + pass + + class case(): pass + def match(): + global case + """) + tokens = list(source_token_lines(source)) + assert tokens[0][0] == ("key", "match") + assert tokens[0][4] == ("nam", "match") + assert tokens[1][1] == ("key", "case") + assert tokens[2][1] == ("nam", "match") + assert tokens[2][5] == ("nam", "case") + assert tokens[3][1] == ("key", "case") + assert tokens[4][1] == ("nam", "match") + assert tokens[5][1] == ("key", "match") + assert tokens[6][1] == ("key", "case") + assert tokens[9][2] == ("nam", "case") + assert tokens[10][2] == ("nam", "match") + assert tokens[11][3] == ("nam", "case") + + # The default encoding is different in Python 2 and Python 3. DEF_ENCODING = "utf-8" |