diff options
author | Georg Brandl <georg@python.org> | 2020-09-07 07:55:53 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2020-09-07 07:55:55 +0200 |
commit | 080bbeb859d3346c221f7dcac1ae1822676bec6f (patch) | |
tree | c1efe8651eccd963f9198c91fe6285922b5af221 /pygments | |
parent | d464bf55dabfe4f34e97e9c9f1617f5a508807fd (diff) | |
download | pygments-git-080bbeb859d3346c221f7dcac1ae1822676bec6f.tar.gz |
all: revert changes of [a-zA-Z0-9_] to \w
... which is not equivalent in Unicode mode
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/lexers/arrow.py | 2 | ||||
-rw-r--r-- | pygments/lexers/asm.py | 20 | ||||
-rw-r--r-- | pygments/lexers/basic.py | 14 | ||||
-rw-r--r-- | pygments/lexers/clean.py | 2 | ||||
-rw-r--r-- | pygments/lexers/elm.py | 6 | ||||
-rw-r--r-- | pygments/lexers/praat.py | 4 | ||||
-rw-r--r-- | pygments/lexers/prolog.py | 12 | ||||
-rw-r--r-- | pygments/lexers/promql.py | 4 | ||||
-rw-r--r-- | pygments/lexers/ride.py | 2 | ||||
-rw-r--r-- | pygments/lexers/solidity.py | 21 | ||||
-rw-r--r-- | pygments/lexers/sql.py | 8 | ||||
-rw-r--r-- | pygments/lexers/teraterm.py | 6 | ||||
-rw-r--r-- | pygments/lexers/webidl.py | 2 |
13 files changed, 52 insertions, 51 deletions
diff --git a/pygments/lexers/arrow.py b/pygments/lexers/arrow.py index 0f57b145..452a4164 100644 --- a/pygments/lexers/arrow.py +++ b/pygments/lexers/arrow.py @@ -16,7 +16,7 @@ from pygments.token import Text, Operator, Keyword, Punctuation, Name, \ __all__ = ['ArrowLexer'] TYPES = r'\b(int|bool|char)((?:\[\])*)(?=\s+)' -IDENT = r'([a-zA-Z_]\w*)' +IDENT = r'([a-zA-Z_][a-zA-Z0-9_]*)' DECL = TYPES + r'(\s+)' + IDENT diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index fa0c3589..354c80c8 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -472,19 +472,19 @@ class LlvmMirBodyLexer(RegexLexer): # Attributes on basic blocks (words(('liveins', 'successors'), suffix=':'), Keyword), # Basic Block Labels - (r'bb\.[0-9]+(\.[\w.-]+)?( \(address-taken\))?:', Name.Label), - (r'bb\.[0-9]+ \(%[\w.-]+\)( \(address-taken\))?:', Name.Label), + (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label), + (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label), (r'%bb\.[0-9]+(\.\w+)?', Name.Label), # Stack references (r'%stack\.[0-9]+(\.\w+\.addr)?', Name), # Subreg indices (r'%subreg\.\w+', Name), # Virtual registers - (r'%\w+ *', Name.Variable, 'vreg'), + (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'), # Reference to LLVM-IR global include('global'), # Reference to Intrinsic - (r'intrinsic\(\@[\w.]+\)', Name.Variable.Global), + (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global), # Comparison predicates (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin), @@ -537,7 +537,7 @@ class LlvmMirBodyLexer(RegexLexer): # MIR Comments (r';.*', Comment), # If we get here, assume it's a target instruction - (r'\w+', Name), + (r'[a-zA-Z0-9_]+', Name), # Everything else that isn't highlighted (r'[(), \n]+', Text), ], @@ -561,7 +561,7 @@ class LlvmMirBodyLexer(RegexLexer): 'vreg_bank_or_class': [ # The unassigned bank/class (r' *_', Name.Variable.Magic), - (r' *\w+', Name.Variable), + (r' *[a-zA-Z0-9_]+', Name.Variable), # The LLT if there is one (r' *\(', Text, 'vreg_type'), (r'(?=.)', Text, '#pop'), @@ -580,8 +580,8 @@ class LlvmMirBodyLexer(RegexLexer): 'acquire', 'release', 'acq_rel', 'seq_cst')), Keyword), # IR references - (r'%ir\.[\w.-]+', Name), - (r'%ir-block\.[\w.-]+', Name), + (r'%ir\.[a-zA-Z0-9_.-]+', Name), + (r'%ir-block\.[a-zA-Z0-9_.-]+', Name), (r'[-+]', Operator), include('integer'), include('global'), @@ -591,7 +591,7 @@ class LlvmMirBodyLexer(RegexLexer): ], 'integer': [(r'-?[0-9]+', Number.Integer),], 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], - 'global': [(r'\@[\w.]+', Name.Variable.Global)], + 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)], } @@ -935,7 +935,7 @@ class Dasm16Lexer(RegexLexer): ] # Regexes yo - char = r'[\w$@.]' + char = r'[a-zA-Z0-9_$@.]' identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)' binary_number = r'0b[01_]+' diff --git a/pygments/lexers/basic.py b/pygments/lexers/basic.py index a766a949..0e46f23b 100644 --- a/pygments/lexers/basic.py +++ b/pygments/lexers/basic.py @@ -523,15 +523,15 @@ class VBScriptLexer(RegexLexer): (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Float variant 2, for example: .1, .1e2 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Float variant 3, for example: 123e45 - (r'\d+', Number.Integer), + (r'[0-9]+', Number.Integer), ('#.+#', String), # date or time value - (r'(dim)(\s+)([a-z_]\w*)', + (r'(dim)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Variable), 'dim_more'), - (r'(function|sub)(\s+)([a-z_]\w*)', + (r'(function|sub)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Function)), - (r'(class)(\s+)([a-z_]\w*)', + (r'(class)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Class)), - (r'(const)(\s+)([a-z_]\w*)', + (r'(const)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Name.Constant)), (r'(end)(\s+)(class|function|if|property|sub|with)', bygroups(Keyword, Whitespace, Keyword)), @@ -540,7 +540,7 @@ class VBScriptLexer(RegexLexer): (r'(on)(\s+)(error)(\s+)(resume)(\s+)(next)', bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Keyword)), (r'(option)(\s+)(explicit)', bygroups(Keyword, Whitespace, Keyword)), - (r'(property)(\s+)(get|let|set)(\s+)([a-z_]\w*)', + (r'(property)(\s+)(get|let|set)(\s+)([a-z_][a-z0-9_]*)', bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace, Name.Property)), (r'rem\s.*[^\n]*', Comment.Single), (words(_vbscript_builtins.KEYWORDS, suffix=r'\b'), Keyword), @@ -549,7 +549,7 @@ class VBScriptLexer(RegexLexer): (words(_vbscript_builtins.BUILTIN_CONSTANTS, suffix=r'\b'), Name.Constant), (words(_vbscript_builtins.BUILTIN_FUNCTIONS, suffix=r'\b'), Name.Builtin), (words(_vbscript_builtins.BUILTIN_VARIABLES, suffix=r'\b'), Name.Builtin), - (r'[a-z_]\w*', Name), + (r'[a-z_][a-z0-9_]*', Name), (r'\b_\n', Operator), (words(r'(),.:'), Punctuation), (r'.+(\n)?', Error) diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index e5b0cf19..b5dba09b 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -40,7 +40,7 @@ class CleanLexer(ExtendedRegexLexer): funnyId = r'[~@#$%\^?!+\-*<>\\/|&=:]+' scoreUpperId = r'_' + upperId scoreLowerId = r'_' + lowerId - moduleId = r'[a-zA-Z_][\w.`]+' + moduleId = r'[a-zA-Z_][a-zA-Z0-9_.`]+' classId = '|'.join([lowerId, upperId, funnyId]) tokens = { diff --git a/pygments/lexers/elm.py b/pygments/lexers/elm.py index bb680d13..46c12eda 100644 --- a/pygments/lexers/elm.py +++ b/pygments/lexers/elm.py @@ -27,7 +27,7 @@ class ElmLexer(RegexLexer): filenames = ['*.elm'] mimetypes = ['text/x-elm'] - validName = r'[a-z_][\w\']*' + validName = r'[a-z_][a-zA-Z0-9_\']*' specialName = r'^main ' @@ -40,7 +40,7 @@ class ElmLexer(RegexLexer): reservedWords = words(( 'alias', 'as', 'case', 'else', 'if', 'import', 'in', 'let', 'module', 'of', 'port', 'then', 'type', 'where', - ), suffix=r'\b') + ), suffix=r'\b') tokens = { 'root': [ @@ -68,7 +68,7 @@ class ElmLexer(RegexLexer): (reservedWords, Keyword.Reserved), # Types - (r'[A-Z]\w*', Keyword.Type), + (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), # Main (specialName, Keyword.Reserved), diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py index 7799ec35..36c6d69d 100644 --- a/pygments/lexers/praat.py +++ b/pygments/lexers/praat.py @@ -215,7 +215,7 @@ class PraatLexer(RegexLexer): ], 'object_reference': [ include('string_interpolated'), - (r'([a-z]\w*|\d+)', Name.Builtin), + (r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin), (words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'), @@ -228,7 +228,7 @@ class PraatLexer(RegexLexer): (words(variables_string, suffix=r'\$'), Name.Variable.Global), (words(variables_numeric, - suffix=r'(?=[^\w."\'$#\[:(]|\s|^|$)'), + suffix=r'(?=[^a-zA-Z0-9_."\'$#\[:(]|\s|^|$)'), Name.Variable.Global), (words(objects, prefix=r'\b', suffix=r"(_)"), diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py index 5baa916d..40ef0df3 100644 --- a/pygments/lexers/prolog.py +++ b/pygments/lexers/prolog.py @@ -113,7 +113,7 @@ class LogtalkLexer(RegexLexer): (r'0x[0-9a-fA-F]+', Number.Hex), (r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number), # Variables - (r'([A-Z_]\w*)', Name.Variable), + (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable), # Event handlers (r'(after|before)(?=[(])', Keyword), # Message forwarding handler @@ -231,7 +231,7 @@ class LogtalkLexer(RegexLexer): # Punctuation (r'[()\[\],.|]', Text), # Atoms - (r"[a-z]\w*", Text), + (r"[a-z][a-zA-Z0-9_]*", Text), (r"'", String, 'quoted_atom'), ], @@ -259,8 +259,8 @@ class LogtalkLexer(RegexLexer): (r'(alias|d(ynamic|iscontiguous)|m(eta_(non_terminal|predicate)|ode|ultifile)|s(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', Keyword, 'root'), (r'op(?=[(])', Keyword, 'root'), (r'(c(alls|oinductive)|module|reexport|use(s|_module))(?=[(])', Keyword, 'root'), - (r'[a-z]\w*(?=[(])', Text, 'root'), - (r'[a-z]\w*(?=[.])', Text, 'root'), + (r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'), + (r'[a-z][a-zA-Z0-9_]*(?=[.])', Text, 'root'), ], 'entityrelations': [ @@ -272,9 +272,9 @@ class LogtalkLexer(RegexLexer): (r'0x[0-9a-fA-F]+', Number.Hex), (r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number), # Variables - (r'([A-Z_]\w*)', Name.Variable), + (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable), # Atoms - (r"[a-z]\w*", Text), + (r"[a-z][a-zA-Z0-9_]*", Text), (r"'", String, 'quoted_atom'), # Strings (r'"(\\\\|\\"|[^"])*"', String), diff --git a/pygments/lexers/promql.py b/pygments/lexers/promql.py index 7888e086..18069208 100644 --- a/pygments/lexers/promql.py +++ b/pygments/lexers/promql.py @@ -153,7 +153,7 @@ class PromQLLexer(RegexLexer): (r"==|!=|>=|<=|<|>", Operator), (r"and|or|unless", Operator.Word), # Metrics - (r"[_a-zA-Z]\w+", Name.Variable), + (r"[_a-zA-Z][a-zA-Z0-9_]+", Name.Variable), # Params (r'(["\'])(.*?)(["\'])', bygroups(Punctuation, String, Punctuation)), # Other states @@ -167,7 +167,7 @@ class PromQLLexer(RegexLexer): (r"\n", Whitespace), (r"\s+", Whitespace), (r",", Punctuation), - (r'([_a-zA-Z]\w*?)(\s*?)(=~|!=|=|~!)(\s*?)(")(.*?)(")', + (r'([_a-zA-Z][a-zA-Z0-9_]*?)(\s*?)(=~|!=|=|~!)(\s*?)(")(.*?)(")', bygroups(Name.Label, Whitespace, Operator, Whitespace, Punctuation, String, Punctuation)), ], diff --git a/pygments/lexers/ride.py b/pygments/lexers/ride.py index 4116cb8c..490d1e07 100644 --- a/pygments/lexers/ride.py +++ b/pygments/lexers/ride.py @@ -28,7 +28,7 @@ class RideLexer(RegexLexer): filenames = ['*.ride'] mimetypes = ['text/x-ride'] - validName = r'[a-zA-Z_][\w\']*' + validName = r'[a-zA-Z_][a-zA-Z0-9_\']*' builtinOps = ( '||', '|', '>=', '>', '==', '!', diff --git a/pygments/lexers/solidity.py b/pygments/lexers/solidity.py index f47887d6..af0672ee 100644 --- a/pygments/lexers/solidity.py +++ b/pygments/lexers/solidity.py @@ -13,7 +13,7 @@ import re from pygments.lexer import RegexLexer, bygroups, include, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation + Number, Punctuation, Whitespace __all__ = ['SolidityLexer'] @@ -33,7 +33,7 @@ class SolidityLexer(RegexLexer): flags = re.MULTILINE | re.UNICODE datatype = ( - r'\b(address|bool|((bytes|hash|int|string|uint)(8|16|24|32|40|48|56|64' + r'\b(address|bool|(?:(?:bytes|hash|int|string|uint)(?:8|16|24|32|40|48|56|64' r'|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208' r'|216|224|232|240|248|256)?))\b' ) @@ -44,14 +44,13 @@ class SolidityLexer(RegexLexer): include('comments'), (r'\bpragma\s+solidity\b', Keyword, 'pragma'), (r'\b(contract)(\s+)([a-zA-Z_]\w*)', - bygroups(Keyword, Text.WhiteSpace, Name.Entity)), - (datatype + r'(\s+)((external|public|internal|private)\s+)?' + + bygroups(Keyword, Whitespace, Name.Entity)), + (datatype + r'(\s+)((?:external|public|internal|private)\s+)?' + r'([a-zA-Z_]\w*)', - bygroups(Keyword.Type, None, None, None, Text.WhiteSpace, Keyword, - None, Name.Variable)), + bygroups(Keyword.Type, Whitespace, Keyword, Name.Variable)), (r'\b(enum|event|function|struct)(\s+)([a-zA-Z_]\w*)', - bygroups(Keyword.Type, Text.WhiteSpace, Name.Variable)), - (r'\b(msg|block|tx)\.([A-Za-z_]\w*)\b', Keyword), + bygroups(Keyword.Type, Whitespace, Name.Variable)), + (r'\b(msg|block|tx)\.([A-Za-z_][a-zA-Z0-9_]*)\b', Keyword), (words(( 'block', 'break', 'constant', 'constructor', 'continue', 'contract', 'do', 'else', 'external', 'false', 'for', @@ -83,11 +82,11 @@ class SolidityLexer(RegexLexer): include('whitespace'), include('comments'), (r'(\^|>=|<)(\s*)(\d+\.\d+\.\d+)', - bygroups(Operator, Text.WhiteSpace, Keyword)), + bygroups(Operator, Whitespace, Keyword)), (r';', Punctuation, '#pop') ], 'whitespace': [ - (r'\s+', Text.WhiteSpace), - (r'\n', Text.WhiteSpace) + (r'\s+', Whitespace), + (r'\n', Whitespace) ] } diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 62459bf8..98d53c5c 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -623,10 +623,12 @@ class MySqlLexer(RegexLexer): (r'[0-9]+', Number.Integer), # Date literals - (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", Literal.Date), + (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", + Literal.Date), # Time literals - (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", Literal.Date), + (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", + Literal.Date), # Timestamp literals ( @@ -644,7 +646,7 @@ class MySqlLexer(RegexLexer): # Variables (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), - (r'@[\w$.]+', Name.Variable), + (r'@[a-z0-9_$.]+', Name.Variable), (r"@'", Name.Variable, 'single-quoted-variable'), (r'@"', Name.Variable, 'double-quoted-variable'), (r"@`", Name.Variable, 'backtick-quoted-variable'), diff --git a/pygments/lexers/teraterm.py b/pygments/lexers/teraterm.py index 25288054..ef1a05e3 100644 --- a/pygments/lexers/teraterm.py +++ b/pygments/lexers/teraterm.py @@ -52,7 +52,7 @@ class TeraTermLexer(RegexLexer): (r'[*/]', Comment.Multiline) ], 'labels': [ - (r'^(\s*)(:\w+)', bygroups(Text, Name.Label)), + (r'(?i)^(\s*)(:[a-z0-9_]+)', bygroups(Text, Name.Label)), ], 'commands': [ ( @@ -259,7 +259,7 @@ class TeraTermLexer(RegexLexer): r')\b', Keyword, ), - (r'(?i)(call|goto)([ \t]+)(\w+)', + (r'(?i)(call|goto)([ \t]+)([a-z0-9_]+)', bygroups(Keyword, Text, Name.Label)), ], 'builtin-variables': [ @@ -295,7 +295,7 @@ class TeraTermLexer(RegexLexer): ), ], 'user-variables': [ - (r'(?i)[A-Z_]\w*', Name.Variable), + (r'(?i)[a-z_][a-z0-9_]*', Name.Variable), ], 'numeric-literals': [ (r'(-?)([0-9]+)', bygroups(Operator, Number.Integer)), diff --git a/pygments/lexers/webidl.py b/pygments/lexers/webidl.py index ef8518ce..81ac44c2 100644 --- a/pygments/lexers/webidl.py +++ b/pygments/lexers/webidl.py @@ -32,7 +32,7 @@ _builtin_types = ( # other 'any', 'void', 'object', 'RegExp', ) -_identifier = r'_?[A-Za-z][\w-]*' +_identifier = r'_?[A-Za-z][a-zA-Z0-9_-]*' _keyword_suffix = r'(?![\w-])' _string = r'"[^"]*"' |