diff options
Diffstat (limited to 'pygments/lexers/asm.py')
-rw-r--r-- | pygments/lexers/asm.py | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index d94204b6..bdb48ce2 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -453,6 +453,7 @@ class LlvmLexer(RegexLexer): ] } + class LlvmMirBodyLexer(RegexLexer): """ For LLVM MIR examples without the YAML wrapper. @@ -471,19 +472,19 @@ class LlvmMirBodyLexer(RegexLexer): # Attributes on basic blocks (words(('liveins', 'successors'), suffix=':'), Keyword), # Basic Block Labels - (r'bb\.[0-9]+(\.[0-9a-zA-Z_.-]+)?( \(address-taken\))?:', Name.Label), - (r'bb\.[0-9]+ \(%[0-9a-zA-Z_.-]+\)( \(address-taken\))?:', Name.Label), + (r'bb\.[0-9]+(\.[\w.-]+)?( \(address-taken\))?:', Name.Label), + (r'bb\.[0-9]+ \(%[\w.-]+\)( \(address-taken\))?:', Name.Label), (r'%bb\.[0-9]+(\.\w+)?', Name.Label), # Stack references (r'%stack\.[0-9]+(\.\w+\.addr)?', Name), # Subreg indices (r'%subreg\.\w+', Name), # Virtual registers - (r'%[0-9a-zA-Z_]+ *', Name.Variable, 'vreg'), + (r'%\w+ *', Name.Variable, 'vreg'), # Reference to LLVM-IR global include('global'), # Reference to Intrinsic - (r'intrinsic\(\@[0-9a-zA-Z_.]+\)', Name.Variable.Global), + (r'intrinsic\(\@[\w.]+\)', Name.Variable.Global), # Comparison predicates (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin), @@ -493,7 +494,7 @@ class LlvmMirBodyLexer(RegexLexer): # Physical registers (r'\$\w+', String.Single), # Assignment operator - (r'[=]', Operator), + (r'=', Operator), # gMIR Opcodes (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|' r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|' @@ -526,7 +527,7 @@ class LlvmMirBodyLexer(RegexLexer): # Flags (words(('killed', 'implicit')), Keyword), # ConstantInt values - (r'[i][0-9]+ +', Keyword.Type, 'constantint'), + (r'i[0-9]+ +', Keyword.Type, 'constantint'), # ConstantFloat values (r'(half|float|double) +', Keyword.Type, 'constantfloat'), # Bare immediates @@ -536,7 +537,7 @@ class LlvmMirBodyLexer(RegexLexer): # MIR Comments (r';.*', Comment), # If we get here, assume it's a target instruction - (r'[0-9a-zA-Z_]+', Name), + (r'\w+', Name), # Everything else that isn't highlighted (r'[(), \n]+', Text), ], @@ -560,7 +561,7 @@ class LlvmMirBodyLexer(RegexLexer): 'vreg_bank_or_class': [ # The unassigned bank/class (r' *_', Name.Variable.Magic), - (r' *[0-9a-zA-Z_]+', Name.Variable), + (r' *\w+', Name.Variable), # The LLT if there is one (r' *\(', Text, 'vreg_type'), (r'(?=.)', Text, '#pop'), @@ -579,8 +580,8 @@ class LlvmMirBodyLexer(RegexLexer): 'acquire', 'release', 'acq_rel', 'seq_cst')), Keyword), # IR references - (r'%ir\.[0-9a-zA-Z_.-]+', Name), - (r'%ir-block\.[0-9a-zA-Z_.-]+', Name), + (r'%ir\.[\w.-]+', Name), + (r'%ir-block\.[\w.-]+', Name), (r'[-+]', Operator), include('integer'), include('global'), @@ -590,9 +591,10 @@ class LlvmMirBodyLexer(RegexLexer): ], 'integer': [(r'-?[0-9]+', Number.Integer),], 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], - 'global': [(r'\@[0-9a-zA-Z_.]+', Name.Variable.Global)], + 'global': [(r'\@[\w.]+', Name.Variable.Global)], } + class LlvmMirLexer(RegexLexer): """ Lexer for the overall LLVM MIR document format. @@ -649,9 +651,18 @@ class LlvmMirLexer(RegexLexer): (r'.+', Text), (r'\n', Text), ], - 'name': [ (r'[^\n]+', Name), default('#pop') ], - 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop') ], - 'number': [ (r' *[0-9]+', Number), default('#pop') ], + 'name': [ + (r'[^\n]+', Name), + default('#pop'), + ], + 'boolean': [ + (r' *(true|false)', Name.Builtin), + default('#pop'), + ], + 'number': [ + (r' *[0-9]+', Number), + default('#pop'), + ], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir @@ -660,7 +671,7 @@ class LlvmMirLexer(RegexLexer): (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though. - (r'(?!\.\.\.|---)((.|\n)+)', bygroups(using(LlvmMirBodyLexer), Keyword)), + (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))), ], } @@ -924,7 +935,7 @@ class Dasm16Lexer(RegexLexer): ] # Regexes yo - char = r'[a-zA-Z$._0-9@]' + char = r'[\w$@.]' identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)' binary_number = r'0b[01_]+' |