summaryrefslogtreecommitdiff
path: root/pygments
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2020-09-07 07:55:53 +0200
committerGeorg Brandl <georg@python.org>2020-09-07 07:55:55 +0200
commit080bbeb859d3346c221f7dcac1ae1822676bec6f (patch)
treec1efe8651eccd963f9198c91fe6285922b5af221 /pygments
parentd464bf55dabfe4f34e97e9c9f1617f5a508807fd (diff)
downloadpygments-git-080bbeb859d3346c221f7dcac1ae1822676bec6f.tar.gz
all: revert changes of [a-zA-Z0-9_] to \w
... which is not equivalent in Unicode mode
Diffstat (limited to 'pygments')
-rw-r--r--pygments/lexers/arrow.py2
-rw-r--r--pygments/lexers/asm.py20
-rw-r--r--pygments/lexers/basic.py14
-rw-r--r--pygments/lexers/clean.py2
-rw-r--r--pygments/lexers/elm.py6
-rw-r--r--pygments/lexers/praat.py4
-rw-r--r--pygments/lexers/prolog.py12
-rw-r--r--pygments/lexers/promql.py4
-rw-r--r--pygments/lexers/ride.py2
-rw-r--r--pygments/lexers/solidity.py21
-rw-r--r--pygments/lexers/sql.py8
-rw-r--r--pygments/lexers/teraterm.py6
-rw-r--r--pygments/lexers/webidl.py2
13 files changed, 52 insertions, 51 deletions
diff --git a/pygments/lexers/arrow.py b/pygments/lexers/arrow.py
index 0f57b145..452a4164 100644
--- a/pygments/lexers/arrow.py
+++ b/pygments/lexers/arrow.py
@@ -16,7 +16,7 @@ from pygments.token import Text, Operator, Keyword, Punctuation, Name, \
__all__ = ['ArrowLexer']
TYPES = r'\b(int|bool|char)((?:\[\])*)(?=\s+)'
-IDENT = r'([a-zA-Z_]\w*)'
+IDENT = r'([a-zA-Z_][a-zA-Z0-9_]*)'
DECL = TYPES + r'(\s+)' + IDENT
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py
index fa0c3589..354c80c8 100644
--- a/pygments/lexers/asm.py
+++ b/pygments/lexers/asm.py
@@ -472,19 +472,19 @@ class LlvmMirBodyLexer(RegexLexer):
# Attributes on basic blocks
(words(('liveins', 'successors'), suffix=':'), Keyword),
# Basic Block Labels
- (r'bb\.[0-9]+(\.[\w.-]+)?( \(address-taken\))?:', Name.Label),
- (r'bb\.[0-9]+ \(%[\w.-]+\)( \(address-taken\))?:', Name.Label),
+ (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
+ (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
(r'%bb\.[0-9]+(\.\w+)?', Name.Label),
# Stack references
(r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
# Subreg indices
(r'%subreg\.\w+', Name),
# Virtual registers
- (r'%\w+ *', Name.Variable, 'vreg'),
+ (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),
# Reference to LLVM-IR global
include('global'),
# Reference to Intrinsic
- (r'intrinsic\(\@[\w.]+\)', Name.Variable.Global),
+ (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global),
# Comparison predicates
(words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
@@ -537,7 +537,7 @@ class LlvmMirBodyLexer(RegexLexer):
# MIR Comments
(r';.*', Comment),
# If we get here, assume it's a target instruction
- (r'\w+', Name),
+ (r'[a-zA-Z0-9_]+', Name),
# Everything else that isn't highlighted
(r'[(), \n]+', Text),
],
@@ -561,7 +561,7 @@ class LlvmMirBodyLexer(RegexLexer):
'vreg_bank_or_class': [
# The unassigned bank/class
(r' *_', Name.Variable.Magic),
- (r' *\w+', Name.Variable),
+ (r' *[a-zA-Z0-9_]+', Name.Variable),
# The LLT if there is one
(r' *\(', Text, 'vreg_type'),
(r'(?=.)', Text, '#pop'),
@@ -580,8 +580,8 @@ class LlvmMirBodyLexer(RegexLexer):
'acquire', 'release', 'acq_rel', 'seq_cst')),
Keyword),
# IR references
- (r'%ir\.[\w.-]+', Name),
- (r'%ir-block\.[\w.-]+', Name),
+ (r'%ir\.[a-zA-Z0-9_.-]+', Name),
+ (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
(r'[-+]', Operator),
include('integer'),
include('global'),
@@ -591,7 +591,7 @@ class LlvmMirBodyLexer(RegexLexer):
],
'integer': [(r'-?[0-9]+', Number.Integer),],
'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
- 'global': [(r'\@[\w.]+', Name.Variable.Global)],
+ 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
}
@@ -935,7 +935,7 @@ class Dasm16Lexer(RegexLexer):
]
# Regexes yo
- char = r'[\w$@.]'
+ char = r'[a-zA-Z0-9_$@.]'
identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
binary_number = r'0b[01_]+'
diff --git a/pygments/lexers/basic.py b/pygments/lexers/basic.py
index a766a949..0e46f23b 100644
--- a/pygments/lexers/basic.py
+++ b/pygments/lexers/basic.py
@@ -523,15 +523,15 @@ class VBScriptLexer(RegexLexer):
(r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
(r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Float variant 2, for example: .1, .1e2
(r'[0-9]+e[+-]?[0-9]+', Number.Float), # Float variant 3, for example: 123e45
- (r'\d+', Number.Integer),
+ (r'[0-9]+', Number.Integer),
('#.+#', String), # date or time value
- (r'(dim)(\s+)([a-z_]\w*)',
+ (r'(dim)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Variable), 'dim_more'),
- (r'(function|sub)(\s+)([a-z_]\w*)',
+ (r'(function|sub)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Function)),
- (r'(class)(\s+)([a-z_]\w*)',
+ (r'(class)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Class)),
- (r'(const)(\s+)([a-z_]\w*)',
+ (r'(const)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Name.Constant)),
(r'(end)(\s+)(class|function|if|property|sub|with)',
bygroups(Keyword, Whitespace, Keyword)),
@@ -540,7 +540,7 @@ class VBScriptLexer(RegexLexer):
(r'(on)(\s+)(error)(\s+)(resume)(\s+)(next)',
bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Keyword)),
(r'(option)(\s+)(explicit)', bygroups(Keyword, Whitespace, Keyword)),
- (r'(property)(\s+)(get|let|set)(\s+)([a-z_]\w*)',
+ (r'(property)(\s+)(get|let|set)(\s+)([a-z_][a-z0-9_]*)',
bygroups(Keyword.Declaration, Whitespace, Keyword.Declaration, Whitespace, Name.Property)),
(r'rem\s.*[^\n]*', Comment.Single),
(words(_vbscript_builtins.KEYWORDS, suffix=r'\b'), Keyword),
@@ -549,7 +549,7 @@ class VBScriptLexer(RegexLexer):
(words(_vbscript_builtins.BUILTIN_CONSTANTS, suffix=r'\b'), Name.Constant),
(words(_vbscript_builtins.BUILTIN_FUNCTIONS, suffix=r'\b'), Name.Builtin),
(words(_vbscript_builtins.BUILTIN_VARIABLES, suffix=r'\b'), Name.Builtin),
- (r'[a-z_]\w*', Name),
+ (r'[a-z_][a-z0-9_]*', Name),
(r'\b_\n', Operator),
(words(r'(),.:'), Punctuation),
(r'.+(\n)?', Error)
diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py
index e5b0cf19..b5dba09b 100644
--- a/pygments/lexers/clean.py
+++ b/pygments/lexers/clean.py
@@ -40,7 +40,7 @@ class CleanLexer(ExtendedRegexLexer):
funnyId = r'[~@#$%\^?!+\-*<>\\/|&=:]+'
scoreUpperId = r'_' + upperId
scoreLowerId = r'_' + lowerId
- moduleId = r'[a-zA-Z_][\w.`]+'
+ moduleId = r'[a-zA-Z_][a-zA-Z0-9_.`]+'
classId = '|'.join([lowerId, upperId, funnyId])
tokens = {
diff --git a/pygments/lexers/elm.py b/pygments/lexers/elm.py
index bb680d13..46c12eda 100644
--- a/pygments/lexers/elm.py
+++ b/pygments/lexers/elm.py
@@ -27,7 +27,7 @@ class ElmLexer(RegexLexer):
filenames = ['*.elm']
mimetypes = ['text/x-elm']
- validName = r'[a-z_][\w\']*'
+ validName = r'[a-z_][a-zA-Z0-9_\']*'
specialName = r'^main '
@@ -40,7 +40,7 @@ class ElmLexer(RegexLexer):
reservedWords = words((
'alias', 'as', 'case', 'else', 'if', 'import', 'in',
'let', 'module', 'of', 'port', 'then', 'type', 'where',
- ), suffix=r'\b')
+ ), suffix=r'\b')
tokens = {
'root': [
@@ -68,7 +68,7 @@ class ElmLexer(RegexLexer):
(reservedWords, Keyword.Reserved),
# Types
- (r'[A-Z]\w*', Keyword.Type),
+ (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
# Main
(specialName, Keyword.Reserved),
diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py
index 7799ec35..36c6d69d 100644
--- a/pygments/lexers/praat.py
+++ b/pygments/lexers/praat.py
@@ -215,7 +215,7 @@ class PraatLexer(RegexLexer):
],
'object_reference': [
include('string_interpolated'),
- (r'([a-z]\w*|\d+)', Name.Builtin),
+ (r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin),
(words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'),
@@ -228,7 +228,7 @@ class PraatLexer(RegexLexer):
(words(variables_string, suffix=r'\$'), Name.Variable.Global),
(words(variables_numeric,
- suffix=r'(?=[^\w."\'$#\[:(]|\s|^|$)'),
+ suffix=r'(?=[^a-zA-Z0-9_."\'$#\[:(]|\s|^|$)'),
Name.Variable.Global),
(words(objects, prefix=r'\b', suffix=r"(_)"),
diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py
index 5baa916d..40ef0df3 100644
--- a/pygments/lexers/prolog.py
+++ b/pygments/lexers/prolog.py
@@ -113,7 +113,7 @@ class LogtalkLexer(RegexLexer):
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number),
# Variables
- (r'([A-Z_]\w*)', Name.Variable),
+ (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable),
# Event handlers
(r'(after|before)(?=[(])', Keyword),
# Message forwarding handler
@@ -231,7 +231,7 @@ class LogtalkLexer(RegexLexer):
# Punctuation
(r'[()\[\],.|]', Text),
# Atoms
- (r"[a-z]\w*", Text),
+ (r"[a-z][a-zA-Z0-9_]*", Text),
(r"'", String, 'quoted_atom'),
],
@@ -259,8 +259,8 @@ class LogtalkLexer(RegexLexer):
(r'(alias|d(ynamic|iscontiguous)|m(eta_(non_terminal|predicate)|ode|ultifile)|s(et_(logtalk|prolog)_flag|ynchronized))(?=[(])', Keyword, 'root'),
(r'op(?=[(])', Keyword, 'root'),
(r'(c(alls|oinductive)|module|reexport|use(s|_module))(?=[(])', Keyword, 'root'),
- (r'[a-z]\w*(?=[(])', Text, 'root'),
- (r'[a-z]\w*(?=[.])', Text, 'root'),
+ (r'[a-z][a-zA-Z0-9_]*(?=[(])', Text, 'root'),
+ (r'[a-z][a-zA-Z0-9_]*(?=[.])', Text, 'root'),
],
'entityrelations': [
@@ -272,9 +272,9 @@ class LogtalkLexer(RegexLexer):
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'\d+\.?\d*((e|E)(\+|-)?\d+)?', Number),
# Variables
- (r'([A-Z_]\w*)', Name.Variable),
+ (r'([A-Z_][a-zA-Z0-9_]*)', Name.Variable),
# Atoms
- (r"[a-z]\w*", Text),
+ (r"[a-z][a-zA-Z0-9_]*", Text),
(r"'", String, 'quoted_atom'),
# Strings
(r'"(\\\\|\\"|[^"])*"', String),
diff --git a/pygments/lexers/promql.py b/pygments/lexers/promql.py
index 7888e086..18069208 100644
--- a/pygments/lexers/promql.py
+++ b/pygments/lexers/promql.py
@@ -153,7 +153,7 @@ class PromQLLexer(RegexLexer):
(r"==|!=|>=|<=|<|>", Operator),
(r"and|or|unless", Operator.Word),
# Metrics
- (r"[_a-zA-Z]\w+", Name.Variable),
+ (r"[_a-zA-Z][a-zA-Z0-9_]+", Name.Variable),
# Params
(r'(["\'])(.*?)(["\'])', bygroups(Punctuation, String, Punctuation)),
# Other states
@@ -167,7 +167,7 @@ class PromQLLexer(RegexLexer):
(r"\n", Whitespace),
(r"\s+", Whitespace),
(r",", Punctuation),
- (r'([_a-zA-Z]\w*?)(\s*?)(=~|!=|=|~!)(\s*?)(")(.*?)(")',
+ (r'([_a-zA-Z][a-zA-Z0-9_]*?)(\s*?)(=~|!=|=|~!)(\s*?)(")(.*?)(")',
bygroups(Name.Label, Whitespace, Operator, Whitespace,
Punctuation, String, Punctuation)),
],
diff --git a/pygments/lexers/ride.py b/pygments/lexers/ride.py
index 4116cb8c..490d1e07 100644
--- a/pygments/lexers/ride.py
+++ b/pygments/lexers/ride.py
@@ -28,7 +28,7 @@ class RideLexer(RegexLexer):
filenames = ['*.ride']
mimetypes = ['text/x-ride']
- validName = r'[a-zA-Z_][\w\']*'
+ validName = r'[a-zA-Z_][a-zA-Z0-9_\']*'
builtinOps = (
'||', '|', '>=', '>', '==', '!',
diff --git a/pygments/lexers/solidity.py b/pygments/lexers/solidity.py
index f47887d6..af0672ee 100644
--- a/pygments/lexers/solidity.py
+++ b/pygments/lexers/solidity.py
@@ -13,7 +13,7 @@ import re
from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation
+ Number, Punctuation, Whitespace
__all__ = ['SolidityLexer']
@@ -33,7 +33,7 @@ class SolidityLexer(RegexLexer):
flags = re.MULTILINE | re.UNICODE
datatype = (
- r'\b(address|bool|((bytes|hash|int|string|uint)(8|16|24|32|40|48|56|64'
+ r'\b(address|bool|(?:(?:bytes|hash|int|string|uint)(?:8|16|24|32|40|48|56|64'
r'|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208'
r'|216|224|232|240|248|256)?))\b'
)
@@ -44,14 +44,13 @@ class SolidityLexer(RegexLexer):
include('comments'),
(r'\bpragma\s+solidity\b', Keyword, 'pragma'),
(r'\b(contract)(\s+)([a-zA-Z_]\w*)',
- bygroups(Keyword, Text.WhiteSpace, Name.Entity)),
- (datatype + r'(\s+)((external|public|internal|private)\s+)?' +
+ bygroups(Keyword, Whitespace, Name.Entity)),
+ (datatype + r'(\s+)((?:external|public|internal|private)\s+)?' +
r'([a-zA-Z_]\w*)',
- bygroups(Keyword.Type, None, None, None, Text.WhiteSpace, Keyword,
- None, Name.Variable)),
+ bygroups(Keyword.Type, Whitespace, Keyword, Name.Variable)),
(r'\b(enum|event|function|struct)(\s+)([a-zA-Z_]\w*)',
- bygroups(Keyword.Type, Text.WhiteSpace, Name.Variable)),
- (r'\b(msg|block|tx)\.([A-Za-z_]\w*)\b', Keyword),
+ bygroups(Keyword.Type, Whitespace, Name.Variable)),
+ (r'\b(msg|block|tx)\.([A-Za-z_][a-zA-Z0-9_]*)\b', Keyword),
(words((
'block', 'break', 'constant', 'constructor', 'continue',
'contract', 'do', 'else', 'external', 'false', 'for',
@@ -83,11 +82,11 @@ class SolidityLexer(RegexLexer):
include('whitespace'),
include('comments'),
(r'(\^|>=|<)(\s*)(\d+\.\d+\.\d+)',
- bygroups(Operator, Text.WhiteSpace, Keyword)),
+ bygroups(Operator, Whitespace, Keyword)),
(r';', Punctuation, '#pop')
],
'whitespace': [
- (r'\s+', Text.WhiteSpace),
- (r'\n', Text.WhiteSpace)
+ (r'\s+', Whitespace),
+ (r'\n', Whitespace)
]
}
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index 62459bf8..98d53c5c 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -623,10 +623,12 @@ class MySqlLexer(RegexLexer):
(r'[0-9]+', Number.Integer),
# Date literals
- (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", Literal.Date),
+ (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
+ Literal.Date),
# Time literals
- (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", Literal.Date),
+ (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
+ Literal.Date),
# Timestamp literals
(
@@ -644,7 +646,7 @@ class MySqlLexer(RegexLexer):
# Variables
(r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
- (r'@[\w$.]+', Name.Variable),
+ (r'@[a-z0-9_$.]+', Name.Variable),
(r"@'", Name.Variable, 'single-quoted-variable'),
(r'@"', Name.Variable, 'double-quoted-variable'),
(r"@`", Name.Variable, 'backtick-quoted-variable'),
diff --git a/pygments/lexers/teraterm.py b/pygments/lexers/teraterm.py
index 25288054..ef1a05e3 100644
--- a/pygments/lexers/teraterm.py
+++ b/pygments/lexers/teraterm.py
@@ -52,7 +52,7 @@ class TeraTermLexer(RegexLexer):
(r'[*/]', Comment.Multiline)
],
'labels': [
- (r'^(\s*)(:\w+)', bygroups(Text, Name.Label)),
+ (r'(?i)^(\s*)(:[a-z0-9_]+)', bygroups(Text, Name.Label)),
],
'commands': [
(
@@ -259,7 +259,7 @@ class TeraTermLexer(RegexLexer):
r')\b',
Keyword,
),
- (r'(?i)(call|goto)([ \t]+)(\w+)',
+ (r'(?i)(call|goto)([ \t]+)([a-z0-9_]+)',
bygroups(Keyword, Text, Name.Label)),
],
'builtin-variables': [
@@ -295,7 +295,7 @@ class TeraTermLexer(RegexLexer):
),
],
'user-variables': [
- (r'(?i)[A-Z_]\w*', Name.Variable),
+ (r'(?i)[a-z_][a-z0-9_]*', Name.Variable),
],
'numeric-literals': [
(r'(-?)([0-9]+)', bygroups(Operator, Number.Integer)),
diff --git a/pygments/lexers/webidl.py b/pygments/lexers/webidl.py
index ef8518ce..81ac44c2 100644
--- a/pygments/lexers/webidl.py
+++ b/pygments/lexers/webidl.py
@@ -32,7 +32,7 @@ _builtin_types = (
# other
'any', 'void', 'object', 'RegExp',
)
-_identifier = r'_?[A-Za-z][\w-]*'
+_identifier = r'_?[A-Za-z][a-zA-Z0-9_-]*'
_keyword_suffix = r'(?![\w-])'
_string = r'"[^"]*"'