3 files changed, 76 insertions, 3 deletions
diff --git a/coverage/env.py b/coverage/env.py
index 89abbb2e..c300f802 100644
--- a/coverage/env.py
+++ b/coverage/env.py
@@ -105,6 +105,10 @@ class PYBEHAVIOR:
     # Match-case construct.
     match_case = (PYVERSION >= (3, 10))
 
+    # Some words are keywords in some places, identifiers in other places.
+    soft_keywords = (PYVERSION >= (3, 10))
+
+
 # Coverage.py specifics.
 
 # Are we using the C-implemented trace function?
diff --git a/coverage/phystokens.py b/coverage/phystokens.py
index 52c2aa06..f06c0c27 100644
--- a/coverage/phystokens.py
+++ b/coverage/phystokens.py
@@ -3,11 +3,13 @@
 
 """Better tokenizing for coverage.py."""
 
+import ast
 import keyword
 import re
 import token
 import tokenize
 
+from coverage import env
 from coverage.misc import contract
 
 
@@ -66,6 +68,21 @@ def phys_tokens(toks):
         last_lineno = elineno
 
 
+class MatchCaseFinder(ast.NodeVisitor):
+    """Helper for finding match/case lines."""
+    def __init__(self, source):
+        # This will be the set of line numbers that start match or case statements.
+        self.match_case_lines = set()
+        self.visit(ast.parse(source))
+
+    def visit_Match(self, node):
+        """Invoked by ast.NodeVisitor.visit"""
+        self.match_case_lines.add(node.lineno)
+        for case in node.cases:
+            self.match_case_lines.add(case.pattern.lineno)
+        self.generic_visit(node)
+
+
 @contract(source='unicode')
 def source_token_lines(source):
     """Generate a series of lines, one for each line in `source`.
@@ -90,7 +107,10 @@ def source_token_lines(source):
     source = source.expandtabs(8).replace('\r\n', '\n')
     tokgen = generate_tokens(source)
 
-    for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen):
+    if env.PYBEHAVIOR.soft_keywords:
+        match_case_lines = MatchCaseFinder(source).match_case_lines
+
+    for ttype, ttext, (sline, scol), (_, ecol), _ in phys_tokens(tokgen):
         mark_start = True
         for part in re.split('(\n)', ttext):
             if part == '\n':
@@ -107,8 +127,21 @@ def source_token_lines(source):
                     line.append(("ws", " " * (scol - col)))
                     mark_start = False
                 tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3]
-                if ttype == token.NAME and keyword.iskeyword(ttext):
-                    tok_class = "key"
+                if ttype == token.NAME:
+                    if keyword.iskeyword(ttext):
+                        # Hard keywords are always keywords.
+                        tok_class = "key"
+                    elif env.PYBEHAVIOR.soft_keywords and keyword.issoftkeyword(ttext):
+                        # Soft keywords appear at the start of the line, on lines that start
+                        # match or case statements.
+                        if len(line) == 0:
+                            is_start_of_line = True
+                        elif (len(line) == 1) and line[0][0] == "ws":
+                            is_start_of_line = True
+                        else:
+                            is_start_of_line = False
+                        if is_start_of_line and sline in match_case_lines:
+                            tok_class = "key"
                 line.append((tok_class, part))
                 mark_end = True
             scol = 0
diff --git a/tests/test_phystokens.py b/tests/test_phystokens.py
index 82b887e6..3c214c63 100644
--- a/tests/test_phystokens.py
+++ b/tests/test_phystokens.py
@@ -103,6 +103,42 @@ class PhysTokensTest(CoverageTest):
         self.check_file_tokenization(stress)
 
 
+@pytest.mark.skipif(not env.PYBEHAVIOR.soft_keywords, reason="Soft keywords are new in Python 3.10")
+class SoftKeywordTest(CoverageTest):
+    """Tests the tokenizer handling soft keywords."""
+
+    run_in_temp_dir = False
+
+    def test_soft_keywords(self):
+        source = textwrap.dedent("""\
+            match re.match(something):
+                case ["what"]:
+                    match = case("hello")
+                case [_]:
+                    match("hello")
+                    match another.thing:
+                        case 1:
+                            pass
+
+            class case(): pass
+            def match():
+                global case
+            """)
+        tokens = list(source_token_lines(source))
+        assert tokens[0][0] == ("key", "match")
+        assert tokens[0][4] == ("nam", "match")
+        assert tokens[1][1] == ("key", "case")
+        assert tokens[2][1] == ("nam", "match")
+        assert tokens[2][5] == ("nam", "case")
+        assert tokens[3][1] == ("key", "case")
+        assert tokens[4][1] == ("nam", "match")
+        assert tokens[5][1] == ("key", "match")
+        assert tokens[6][1] == ("key", "case")
+        assert tokens[9][2] == ("nam", "case")
+        assert tokens[10][2] == ("nam", "match")
+        assert tokens[11][3] == ("nam", "case")
+
+
 # The default encoding is different in Python 2 and Python 3.
 DEF_ENCODING = "utf-8"