summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2020-12-25 13:16:56 +0100
committerGeorg Brandl <georg@python.org>2020-12-25 13:21:48 +0100
commit681487f82f55fba66f01f9913e4ff103e5b2ef4c (patch)
tree0e3f82343796411c1e9c71f5fa7c31e392d06bf5
parent6c820019a73a606940d7477619a5a9e6ad38761d (diff)
downloadpygments-git-681487f82f55fba66f01f9913e4ff103e5b2ef4c.tar.gz
all: weed out more backtracking string regexes
-rw-r--r--pygments/lexers/actionscript.py4
-rw-r--r--pygments/lexers/ambient.py2
-rw-r--r--pygments/lexers/boa.py6
-rw-r--r--pygments/lexers/configs.py2
-rw-r--r--pygments/lexers/d.py4
-rw-r--r--pygments/lexers/dotnet.py8
-rw-r--r--pygments/lexers/dsls.py4
-rw-r--r--pygments/lexers/go.py2
-rw-r--r--pygments/lexers/graphics.py4
-rw-r--r--pygments/lexers/haxe.py4
-rw-r--r--pygments/lexers/iolang.py2
-rw-r--r--pygments/lexers/jvm.py16
-rw-r--r--pygments/lexers/lisp.py12
-rw-r--r--pygments/lexers/make.py4
-rw-r--r--pygments/lexers/parsers.py48
-rw-r--r--pygments/lexers/php.py4
-rw-r--r--pygments/lexers/prolog.py4
-rw-r--r--pygments/lexers/ruby.py34
-rw-r--r--pygments/lexers/scripting.py8
-rw-r--r--pygments/lexers/supercollider.py4
-rw-r--r--pygments/lexers/templates.py24
-rw-r--r--pygments/lexers/textedit.py4
-rw-r--r--pygments/lexers/urbi.py4
-rw-r--r--pygments/lexers/webmisc.py4
-rw-r--r--pygments/lexers/x10.py2
-rw-r--r--tests/test_html_lexer.py4
26 files changed, 108 insertions, 110 deletions
diff --git a/pygments/lexers/actionscript.py b/pygments/lexers/actionscript.py
index 0d748153..c24d1db3 100644
--- a/pygments/lexers/actionscript.py
+++ b/pygments/lexers/actionscript.py
@@ -37,7 +37,7 @@ class ActionScriptLexer(RegexLexer):
(r'\s+', Text),
(r'//.*?\n', Comment.Single),
(r'/\*.*?\*/', Comment.Multiline),
- (r'/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex),
+ (r'/(\\\\|\\[^\\]|[^/\\\n])*/[gim]*', String.Regex),
(r'[~^*!%&<>|+=:;,/?\\-]+', Operator),
(r'[{}\[\]();.]+', Punctuation),
(words((
@@ -149,7 +149,7 @@ class ActionScript3Lexer(RegexLexer):
bygroups(Keyword, Text, Keyword.Type, Text, Operator)),
(r'//.*?\n', Comment.Single),
(r'/\*.*?\*/', Comment.Multiline),
- (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex),
+ (r'/(\\\\|\\[^\\]|[^\\\n])*/[gisx]*', String.Regex),
(r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)),
(r'(case|default|for|each|in|while|do|break|return|continue|if|else|'
r'throw|try|catch|with|new|typeof|arguments|instanceof|this|'
diff --git a/pygments/lexers/ambient.py b/pygments/lexers/ambient.py
index 82454829..06f87430 100644
--- a/pygments/lexers/ambient.py
+++ b/pygments/lexers/ambient.py
@@ -44,7 +44,7 @@ class AmbientTalkLexer(RegexLexer):
(builtin, Name.Builtin),
(r'(true|false|nil)\b', Keyword.Constant),
(r'(~|lobby|jlobby|/)\.', Keyword.Constant, 'namespace'),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r'\|', Punctuation, 'arglist'),
(r'<:|[*^!%&<>+=,./?-]|:=', Operator),
(r"`[a-zA-Z_]\w*", String.Symbol),
diff --git a/pygments/lexers/boa.py b/pygments/lexers/boa.py
index bbe9dffa..1360bc38 100644
--- a/pygments/lexers/boa.py
+++ b/pygments/lexers/boa.py
@@ -92,9 +92,9 @@ class BoaLexer(RegexLexer):
(classes, Name.Classes),
(words(operators), Operator),
(r'[][(),;{}\\.]', Punctuation),
- (r'"(\\\\|\\"|[^"])*"', String),
- (r'`(\\\\|\\`|[^`])*`', String),
- (words(string_sep), String.Delimeter),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"`(\\\\|\\[^\\]|[^`\\])*`", String.Backtick),
+ (words(string_sep), String.Delimiter),
(r'[a-zA-Z_]+', Name.Variable),
(r'[0-9]+', Number.Integer),
(r'\s+?', Text), # Whitespace
diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py
index cb20305a..3908ac58 100644
--- a/pygments/lexers/configs.py
+++ b/pygments/lexers/configs.py
@@ -909,7 +909,7 @@ class TOMLLexer(RegexLexer):
(r'\s+', Text),
(r'#.*?$', Comment.Single),
# Basic string
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# Literal string
(r'\'\'\'(.*)\'\'\'', String),
(r'\'[^\']*\'', String),
diff --git a/pygments/lexers/d.py b/pygments/lexers/d.py
index 1fd60d0f..843f9aac 100644
--- a/pygments/lexers/d.py
+++ b/pygments/lexers/d.py
@@ -98,7 +98,7 @@ class DLexer(RegexLexer):
# -- AlternateWysiwygString
(r'`[^`]*`[cwd]?', String),
# -- DoubleQuotedString
- (r'"(\\\\|\\"|[^"])*"[cwd]?', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"[cwd]?', String),
# -- EscapeSequence
(r"\\(['\"?\\abfnrtv]|x[0-9a-fA-F]{2}|[0-7]{1,3}"
r"|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|&\w+;)",
@@ -229,7 +229,7 @@ class CrocLexer(RegexLexer):
(r'@`(``|[^`])*`', String),
(r"@'(''|[^'])*'", String),
# -- DoubleQuotedString
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# Tokens
(r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>'
r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)'
diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py
index ed9d478f..0e03947e 100644
--- a/pygments/lexers/dotnet.py
+++ b/pygments/lexers/dotnet.py
@@ -88,7 +88,7 @@ class CSharpLexer(RegexLexer):
(r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
(r'[{}]', Punctuation),
(r'@"(""|[^"])*"', String),
- (r'"(\\\\|\\"|[^"\n])*["\n]', String),
+ (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String),
(r"'\\.'|'[^\\]'", String.Char),
(r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
@@ -213,7 +213,7 @@ class NemerleLexer(RegexLexer):
(r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
(r'[{}]', Punctuation),
(r'@"(""|[^"])*"', String),
- (r'"(\\\\|\\"|[^"\n])*["\n]', String),
+ (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String),
(r"'\\.'|'[^\\]'", String.Char),
(r"0[xX][0-9a-fA-F]+[Ll]?", Number),
(r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFLdD]?", Number),
@@ -325,8 +325,8 @@ class BooLexer(RegexLexer):
(r'\\\n', Text),
(r'\\', Text),
(r'(in|is|and|or|not)\b', Operator.Word),
- (r'/(\\\\|\\/|[^/\s])/', String.Regex),
- (r'@/(\\\\|\\/|[^/])*/', String.Regex),
+ (r'/(\\\\|\\[^\\]|[^/\\\s])/', String.Regex),
+ (r'@/(\\\\|\\[^\\]|[^/\\])*/', String.Regex),
(r'=~|!=|==|<<|>>|[-+/*%=<>&^|]', Operator),
(r'(as|abstract|callable|constructor|destructor|do|import|'
r'enum|event|final|get|interface|internal|of|override|'
diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py
index a6a5e3b4..bdb06aae 100644
--- a/pygments/lexers/dsls.py
+++ b/pygments/lexers/dsls.py
@@ -632,7 +632,7 @@ class AlloyLexer(RegexLexer):
(iden_rex, Name),
(r'[:,]', Punctuation),
(r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r'\n', Text),
]
}
@@ -827,7 +827,7 @@ class FlatlineLexer(RegexLexer):
(r'0x-?[a-f\d]+', Number.Hex),
# strings, symbols and characters
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"\\(.|[a-z]+)", String.Char),
# expression template placeholder
diff --git a/pygments/lexers/go.py b/pygments/lexers/go.py
index 3dc8df82..d64c87a5 100644
--- a/pygments/lexers/go.py
+++ b/pygments/lexers/go.py
@@ -90,7 +90,7 @@ class GoLexer(RegexLexer):
# -- raw_string_lit
(r'`[^`]*`', String),
# -- interpreted_string_lit
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# Tokens
(r'(<<=|>>=|<<|>>|<=|>=|&\^=|&\^|\+=|-=|\*=|/=|%=|&=|\|=|&&|\|\|'
r'|<-|\+\+|--|==|!=|:=|\.\.\.|[+\-*/%&])', Operator),
diff --git a/pygments/lexers/graphics.py b/pygments/lexers/graphics.py
index 52ab8e02..1bb653d4 100644
--- a/pygments/lexers/graphics.py
+++ b/pygments/lexers/graphics.py
@@ -425,7 +425,7 @@ class AsymptoteLexer(RegexLexer):
],
'statements': [
# simple string (TeX friendly)
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# C style string (with character escapes)
(r"'", String, 'string'),
(r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
@@ -775,7 +775,7 @@ class PovrayLexer(RegexLexer):
(r'[0-9]+\.[0-9]*', Number.Float),
(r'\.[0-9]+', Number.Float),
(r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r'\s+', Text),
]
}
diff --git a/pygments/lexers/haxe.py b/pygments/lexers/haxe.py
index f95e4556..3989965a 100644
--- a/pygments/lexers/haxe.py
+++ b/pygments/lexers/haxe.py
@@ -467,7 +467,7 @@ class HaxeLexer(ExtendedRegexLexer):
(r'"', String.Double, ('#pop', 'expr-chain', 'string-double')),
# EReg
- (r'~/(\\\\|\\/|[^/\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')),
+ (r'~/(\\\\|\\[^\\]|[^/\\\n])*/[gimsu]*', String.Regex, ('#pop', 'expr-chain')),
# Array
(r'\[', Punctuation, ('#pop', 'expr-chain', 'array-decl')),
@@ -722,7 +722,7 @@ class HaxeLexer(ExtendedRegexLexer):
(r'"', String.Double, ('#pop', 'string-double')),
# EReg
- (r'~/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex, '#pop'),
+ (r'~/(\\\\|\\[^\\]|[^/\\\n])*/[gim]*', String.Regex, '#pop'),
# Array
(r'\[', Operator, ('#pop', 'array-decl')),
diff --git a/pygments/lexers/iolang.py b/pygments/lexers/iolang.py
index d6c022d2..9c2ed1a6 100644
--- a/pygments/lexers/iolang.py
+++ b/pygments/lexers/iolang.py
@@ -37,7 +37,7 @@ class IoLexer(RegexLexer):
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
(r'/\+', Comment.Multiline, 'nestedcomment'),
# DoubleQuotedString
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# Operators
(r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}',
Operator),
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index aa66b560..b0dfb689 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -292,12 +292,12 @@ class ScalaLexer(RegexLexer):
(r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'),
(r'(type)(\s+)', bygroups(Keyword, Text), 'type'),
(r'""".*?"""(?!")', String),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
(r"'%s" % idrest, Text.Symbol),
(r'[fs]"""', String, 'interptriplestring'), # interpolated strings
(r'[fs]"', String, 'interpstring'), # interpolated strings
- (r'raw"(\\\\|\\"|[^"])*"', String), # raw strings
+ (r'raw"(\\\\|\\[^\\]|[^"\\])*"', String), # raw strings
# (r'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator,
# Name.Attribute)),
(idrest, Name),
@@ -618,7 +618,7 @@ class IokeLexer(RegexLexer):
# Symbols
(r':[\w!:?]+', String.Symbol),
(r'[\w!:?]+:(?![\w!?])', String.Other),
- (r':"(\\\\|\\"|[^"])*"', String.Symbol),
+ (r':"(\\\\|\\[^\\]|[^"\\])*"', String.Symbol),
# Documentation
(r'((?<=fn\()|(?<=fnx\()|(?<=method\()|(?<=macro\()|(?<=lecro\()'
@@ -836,7 +836,7 @@ class ClojureLexer(RegexLexer):
(r'0x-?[abcdef\d]+', Number.Hex),
# strings, symbols and characters
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'" + valid_name, String.Symbol),
(r"\\(.|[a-z]+)", String.Char),
@@ -979,7 +979,7 @@ class CeylonLexer(RegexLexer):
(r'(class|interface|object|alias)(\s+)',
bygroups(Keyword.Declaration, Text), 'class'),
(r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'\\.'|'[^\\]'|'\\\{#[0-9a-fA-F]{4}\}'", String.Char),
(r'".*``.*``.*"', String.Interpol),
(r'(\.)([a-z_]\w*)',
@@ -1057,7 +1057,7 @@ class KotlinLexer(RegexLexer):
(r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
(r'[{}]', Punctuation),
(r'@"(""|[^"])*"', String),
- (r'"(\\\\|\\"|[^"\n])*["\n]', String),
+ (r'"(\\\\|\\[^\\]|[^"\\\n])*["\n]', String),
(r"'\\.'|'[^\\]'", String.Char),
(r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFL]?|"
r"0[xX][0-9a-fA-F]+[Ll]?", Number),
@@ -1659,8 +1659,8 @@ class SarlLexer(RegexLexer):
r'interface|skill|space)(\s+)', bygroups(Keyword.Declaration, Text),
'class'),
(r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
- (r'"(\\\\|\\"|[^"])*"', String),
- (r"'(\\\\|\\'|[^'])*'", String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'[a-zA-Z_]\w*:', Name.Label),
(r'[a-zA-Z_$]\w*', Name),
(r'[~^*!%&\[\](){}<>\|+=:;,./?-]', Operator),
diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py
index 47efb4fb..699ae525 100644
--- a/pygments/lexers/lisp.py
+++ b/pygments/lexers/lisp.py
@@ -119,7 +119,7 @@ class SchemeLexer(RegexLexer):
# (r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
# strings, symbols and characters
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'" + valid_name, String.Symbol),
(r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char),
@@ -403,7 +403,7 @@ class HyLexer(RegexLexer):
(r'0[xX][a-fA-F0-9]+', Number.Hex),
# strings, symbols and characters
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'" + valid_name, String.Symbol),
(r"\\(.|[a-z]+)", String.Char),
(r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
@@ -1490,7 +1490,7 @@ class NewLispLexer(RegexLexer):
(r'\s+', Text),
# strings, symbols and characters
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# braces
(r'\{', String, "bracestring"),
@@ -2383,7 +2383,7 @@ class CPSALexer(SchemeLexer):
# (r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
# strings, symbols and characters
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'" + valid_name, String.Symbol),
(r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char),
@@ -2596,7 +2596,7 @@ class XtlangLexer(RegexLexer):
(r'(#b|#o|#x)[\d.]+', Number),
# strings
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# true/false constants
(r'(#t|#f)', Name.Constant),
@@ -2672,7 +2672,7 @@ class FennelLexer(RegexLexer):
(r'-?\d+\.\d+', Number.Float),
(r'-?\d+', Number.Integer),
- (r'"(\\\\|\\"|\\|[^"\\])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# these are technically strings, but it's worth visually
# distinguishing them because their intent is different
diff --git a/pygments/lexers/make.py b/pygments/lexers/make.py
index 6e63b5aa..3f48d0cd 100644
--- a/pygments/lexers/make.py
+++ b/pygments/lexers/make.py
@@ -92,8 +92,8 @@ class BaseMakefileLexer(RegexLexer):
(r'([\w${}().-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)',
bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
# strings
- (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double),
- (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
# targets
(r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
'block-header'),
diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py
index 13a3a83c..ec2f39e0 100644
--- a/pygments/lexers/parsers.py
+++ b/pygments/lexers/parsers.py
@@ -64,10 +64,10 @@ class RagelLexer(RegexLexer):
(r'[+-]?[0-9]+', Number.Integer),
],
'literals': [
- (r'"(\\\\|\\"|[^"])*"', String), # double quote string
- (r"'(\\\\|\\'|[^'])*'", String), # single quote string
- (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
- (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
+ (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
+ (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
],
'identifiers': [
(r'[a-zA-Z_]\w*', Name.Variable),
@@ -106,15 +106,15 @@ class RagelLexer(RegexLexer):
r'[^\\]\\[{}]', # allow escaped { or }
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'//.*$\n?', # single line comment
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
r'\#.*$\n?', # ruby comment
# regular expression: There's no reason for it to start
# with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
# / is safe now that we've handled regex and javadoc comments
r'/',
@@ -147,12 +147,12 @@ class RagelEmbeddedLexer(RegexLexer):
r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
r'//.*$\n?', # single line comment
r'\#.*$\n?', # ruby/ragel comment
- r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
# / is safe now that we've handled regex and javadoc comments
r'/',
@@ -182,7 +182,7 @@ class RagelEmbeddedLexer(RegexLexer):
# specifically allow regex followed immediately by *
# so it doesn't get mistaken for a comment
- r'/(?!\*)(\\\\|\\/|[^/])*/\*',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
# allow / as long as it's not followed by another / or by a *
r'/(?=[^/*]|$)',
@@ -193,9 +193,9 @@ class RagelEmbeddedLexer(RegexLexer):
)) + r')+',
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
+ r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
r'//.*$\n?', # single line comment
r'\#.*$\n?', # ruby/ragel comment
@@ -416,8 +416,8 @@ class AntlrLexer(RegexLexer):
(r':', Punctuation),
# literals
- (r"'(\\\\|\\'|[^'])*'", String),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'<<([^>]|>[^>])>>', String),
# identifiers
# Tokens start with capital letter.
@@ -456,14 +456,14 @@ class AntlrLexer(RegexLexer):
r'[^${}\'"/\\]+', # exclude unsafe characters
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'//.*$\n?', # single line comment
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
# regular expression: There's no reason for it to start
# with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
# backslashes are okay, as long as we are not backslashing a %
r'\\(?!%)',
@@ -483,14 +483,14 @@ class AntlrLexer(RegexLexer):
r'[^$\[\]\'"/]+', # exclude unsafe characters
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'//.*$\n?', # single line comment
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
# regular expression: There's no reason for it to start
# with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
# Now that we've handled regex and javadoc comments
# it's safe to let / through.
@@ -701,8 +701,8 @@ class TreetopBaseLexer(RegexLexer):
'rule': [
include('space'),
include('end'),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
(r'[A-Za-z_]\w*', Name),
(r'[()]', Punctuation),
diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py
index aab502e2..073ffe6f 100644
--- a/pygments/lexers/php.py
+++ b/pygments/lexers/php.py
@@ -81,8 +81,8 @@ class ZephirLexer(RegexLexer):
(r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
]
}
diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py
index 94f4024c..17ff2c6c 100644
--- a/pygments/lexers/prolog.py
+++ b/pygments/lexers/prolog.py
@@ -227,7 +227,7 @@ class LogtalkLexer(RegexLexer):
# Existential quantifier
(r'\^', Operator),
# Strings
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# Punctuation
(r'[()\[\],.|]', Text),
# Atoms
@@ -277,7 +277,7 @@ class LogtalkLexer(RegexLexer):
(r"[a-z][a-zA-Z0-9_]*", Text),
(r"'", String, 'quoted_atom'),
# Strings
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# End of entity-opening directive
(r'([)]\.)', Text, 'root'),
# Scope operator
diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py
index d1ae5aff..8c6f8e61 100644
--- a/pygments/lexers/ruby.py
+++ b/pygments/lexers/ruby.py
@@ -110,7 +110,7 @@ class RubyLexer(ExtendedRegexLexer):
# easy ones
(r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
(words(RUBY_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
- (r":'(\\\\|\\'|[^'])*'", String.Symbol),
+ (r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol),
(r':"', String.Symbol, 'simple-sym'),
(r'([a-zA-Z_]\w*)(:)(?!:)',
bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9
@@ -452,26 +452,26 @@ class FancyLexer(RegexLexer):
tokens = {
# copied from PerlLexer:
'balanced-regex': [
- (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'),
- (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
+ (r'/(\\\\|\\[^\\]|[^/\\])*/[egimosx]*', String.Regex, '#pop'),
+ (r'!(\\\\|\\[^\\]|[^!\\])*![egimosx]*', String.Regex, '#pop'),
(r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
- (r'\{(\\\\|\\\}|[^}])*\}[egimosx]*', String.Regex, '#pop'),
- (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'),
- (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'),
- (r'\((\\\\|\\\)|[^)])*\)[egimosx]*', String.Regex, '#pop'),
- (r'@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex, '#pop'),
- (r'%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex, '#pop'),
- (r'\$(\\\\|\\\$|[^$])*\$[egimosx]*', String.Regex, '#pop'),
+ (r'\{(\\\\|\\[^\\]|[^}\\])*\}[egimosx]*', String.Regex, '#pop'),
+ (r'<(\\\\|\\[^\\]|[^>\\])*>[egimosx]*', String.Regex, '#pop'),
+ (r'\[(\\\\|\\[^\\]|[^\]\\])*\][egimosx]*', String.Regex, '#pop'),
+ (r'\((\\\\|\\[^\\]|[^)\\])*\)[egimosx]*', String.Regex, '#pop'),
+ (r'@(\\\\|\\[^\\]|[^@\\])*@[egimosx]*', String.Regex, '#pop'),
+ (r'%(\\\\|\\[^\\]|[^%\\])*%[egimosx]*', String.Regex, '#pop'),
+ (r'\$(\\\\|\\[^\\]|[^$\\])*\$[egimosx]*', String.Regex, '#pop'),
],
'root': [
(r'\s+', Text),
# balanced delimiters (copied from PerlLexer):
- (r's\{(\\\\|\\\}|[^}])*\}\s*', String.Regex, 'balanced-regex'),
- (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'),
- (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'),
- (r's\((\\\\|\\\)|[^)])*\)\s*', String.Regex, 'balanced-regex'),
- (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex),
+ (r's\{(\\\\|\\[^\\]|[^}\\])*\}\s*', String.Regex, 'balanced-regex'),
+ (r's<(\\\\|\\[^\\]|[^>\\])*>\s*', String.Regex, 'balanced-regex'),
+ (r's\[(\\\\|\\[^\\]|[^\]\\])*\]\s*', String.Regex, 'balanced-regex'),
+ (r's\((\\\\|\\[^\\]|[^)\\])*\)\s*', String.Regex, 'balanced-regex'),
+ (r'm?/(\\\\|\\[^\\]|[^///\n])*/[gcimosx]*', String.Regex),
(r'm(?=[/!\\{<\[(@%$])', String.Regex, 'balanced-regex'),
# Comments
@@ -479,9 +479,9 @@ class FancyLexer(RegexLexer):
# Symbols
(r'\'([^\'\s\[\](){}]+|\[\])', String.Symbol),
# Multi-line DoubleQuotedString
- (r'"""(\\\\|\\"|[^"])*"""', String),
+ (r'"""(\\\\|\\[^\\]|[^\\])*?"""', String),
# DoubleQuotedString
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# keywords
(r'(def|class|try|catch|finally|retry|return|return_local|match|'
r'case|->|=>)\b', Keyword),
diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py
index 885fed47..eea2c2ac 100644
--- a/pygments/lexers/scripting.py
+++ b/pygments/lexers/scripting.py
@@ -283,7 +283,7 @@ class ChaiscriptLexer(RegexLexer):
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
(r'"', String.Double, 'dqstring'),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
],
'dqstring': [
(r'\$\{[^"}]+?\}', String.Interpol),
@@ -689,7 +689,7 @@ class AppleScriptLexer(RegexLexer):
(r'\b(%s)s?\b' % '|'.join(StudioClasses), Name.Builtin),
(r'\b(%s)\b' % '|'.join(StudioCommands), Name.Builtin),
(r'\b(%s)\b' % '|'.join(References), Name.Builtin),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
(r'\b(%s)\b' % Identifiers, Name.Variable),
(r'[-+]?(\d+\.\d*|\d*\.\d+)(E[-+][0-9]+)?', Number.Float),
(r'[-+]?\d+', Number.Integer),
@@ -833,7 +833,7 @@ class MOOCodeLexer(RegexLexer):
# Numbers
(r'(0|[1-9][0-9_]*)', Number.Integer),
# Strings
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
# exceptions
(r'(E_PERM|E_DIV)', Name.Exception),
# db-refs
@@ -924,7 +924,7 @@ class HybrisLexer(RegexLexer):
'Runnable', 'CGI', 'ClientSocket', 'Socket', 'ServerSocket',
'File', 'Console', 'Directory', 'Exception'), suffix=r'\b'),
Keyword.Type),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),
(r'(\.)([a-zA-Z_]\w*)',
bygroups(Operator, Name.Attribute)),
diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py
index 81865487..48e7ad30 100644
--- a/pygments/lexers/supercollider.py
+++ b/pygments/lexers/supercollider.py
@@ -84,8 +84,8 @@ class SuperColliderLexer(RegexLexer):
(r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
]
}
diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py
index 7ebfb32e..a7773e56 100644
--- a/pygments/lexers/templates.py
+++ b/pygments/lexers/templates.py
@@ -179,8 +179,8 @@ class SmartyLexer(RegexLexer):
(r'(true|false|null)\b', Keyword.Constant),
(r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
r"0[xX][0-9a-fA-F]+[Ll]?", Number),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'[a-zA-Z_]\w*', Name.Attribute)
]
}
@@ -252,8 +252,8 @@ class VelocityLexer(RegexLexer):
(r'\$!?\{?', Punctuation, 'variable'),
(r'\s+', Text),
(r'[,:]', Punctuation),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r"0[xX][0-9a-fA-F]+[Ll]?", Number),
(r"\b[0-9]+\b", Number),
(r'(true|false|null)\b', Keyword.Constant),
@@ -371,8 +371,8 @@ class DjangoLexer(RegexLexer):
(r'(loop|block|super|forloop)\b', Name.Builtin),
(r'[a-zA-Z_][\w-]*', Name.Variable),
(r'\.\w+', Name.Variable),
- (r':?"(\\\\|\\"|[^"])*"', String.Double),
- (r":?'(\\\\|\\'|[^'])*'", String.Single),
+ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'([{}()\[\]+\-*/%,:~]|[><=]=?|!=)', Operator),
(r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
r"0[xX][0-9a-fA-F]+[Ll]?", Number),
@@ -1834,8 +1834,8 @@ class HandlebarsLexer(RegexLexer):
include('variable'),
# borrowed from DjangoLexer
- (r':?"(\\\\|\\"|[^"])*"', String.Double),
- (r":?'(\\\\|\\'|[^'])*'", String.Single),
+ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
r"0[xX][0-9a-fA-F]+[Ll]?", Number),
]
@@ -2147,8 +2147,8 @@ class TwigLexer(RegexLexer):
(_ident_inner, Name.Variable),
(r'\.' + _ident_inner, Name.Variable),
(r'\.[0-9]+', Number),
- (r':?"(\\\\|\\"|[^"])*"', String.Double),
- (r":?'(\\\\|\\'|[^'])*'", String.Single),
+ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'([{}()\[\]+\-*/,:~%]|\.\.|\?|:|\*\*|\/\/|!=|[><=]=?)', Operator),
(r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
r"0[xX][0-9a-fA-F]+[Ll]?", Number),
@@ -2227,8 +2227,8 @@ class Angular2Lexer(RegexLexer):
# Literals
(r':?(true|false)', String.Boolean),
- (r':?"(\\\\|\\"|[^"])*"', String.Double),
- (r":?'(\\\\|\\'|[^'])*'", String.Single),
+ (r':?"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r":?'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
r"0[xX][0-9a-fA-F]+[Ll]?", Number),
diff --git a/pygments/lexers/textedit.py b/pygments/lexers/textedit.py
index ea2d4cf2..a92ccc79 100644
--- a/pygments/lexers/textedit.py
+++ b/pygments/lexers/textedit.py
@@ -69,8 +69,8 @@ class AwkLexer(RegexLexer):
(r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
]
}
diff --git a/pygments/lexers/urbi.py b/pygments/lexers/urbi.py
index 7c11169d..16eca0df 100644
--- a/pygments/lexers/urbi.py
+++ b/pygments/lexers/urbi.py
@@ -117,11 +117,11 @@ class UrbiscriptLexer(ExtendedRegexLexer):
],
'string.double': [
(r'((?:\\\\|\\"|[^"])*?)(\\B\((\d+)\)\()', blob_callback),
- (r'(\\\\|\\"|[^"])*?"', String.Double, '#pop'),
+ (r'(\\\\|\\[^\\]|[^"\\])*?"', String.Double, '#pop'),
],
'string.single': [
(r"((?:\\\\|\\'|[^'])*?)(\\B\((\d+)\)\()", blob_callback),
- (r"(\\\\|\\'|[^'])*?'", String.Single, '#pop'),
+ (r"(\\\\|\\[^\\]|[^'\\])*?'", String.Single, '#pop'),
],
# from http://pygments.org/docs/lexerdevelopment/#changing-states
'comment': [
diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py
index dab36aa6..cb84004d 100644
--- a/pygments/lexers/webmisc.py
+++ b/pygments/lexers/webmisc.py
@@ -857,8 +857,8 @@ class QmlLexer(RegexLexer):
(r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
]
}
diff --git a/pygments/lexers/x10.py b/pygments/lexers/x10.py
index 76138c9e..23b3c1d4 100644
--- a/pygments/lexers/x10.py
+++ b/pygments/lexers/x10.py
@@ -62,7 +62,7 @@ class X10Lexer(RegexLexer):
(r'\b(%s)\b' % '|'.join(types), Keyword.Type),
(r'\b(%s)\b' % '|'.join(values), Keyword.Constant),
(r'\b(%s)\b' % '|'.join(modifiers), Keyword.Declaration),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
(r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
(r'.', Text)
],
diff --git a/tests/test_html_lexer.py b/tests/test_html_lexer.py
index 9ac72e33..62f1c8d4 100644
--- a/tests/test_html_lexer.py
+++ b/tests/test_html_lexer.py
@@ -65,9 +65,7 @@ def test_long_unclosed_javascript_fragment(lexer_html):
tokens_body = [
(Token.Name.Other, 'alert'),
(Token.Punctuation, '('),
- (Token.Literal.String.Double, '"'),
- (Token.Literal.String.Double, 'hi'),
- (Token.Literal.String.Double, '"'),
+ (Token.Literal.String.Double, '"hi"'),
(Token.Punctuation, ')'),
(Token.Punctuation, ';'),
]