diff options
| author | Jendrik <12938023+jendrikw@users.noreply.github.com> | 2021-04-04 17:50:00 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-04-04 17:50:00 +0200 |
| commit | 06f2eba8431a956c84fba9777f8bb2edcf46beee (patch) | |
| tree | 4e5812b84d148e7b0fcce2a1c36458183c1d89ae | |
| parent | 96973db5d787c794df20b4fd0b5278d4c5f64641 (diff) | |
| download | pygments-git-06f2eba8431a956c84fba9777f8bb2edcf46beee.tar.gz | |
Fix #1416: add WebAssembly lexer (#1564)
* add WebAssembly lexer
* avoid test failure by using the default function instead of an empty regex
* address small issues
* fix WebAssembly string escapes
* change WebAssembly multiline comment parsing
* update copyright year
* set versionadded to 2.9
* change WebAssembly tests to use the new test system
* change WebAssembly unit test to use snippets
| -rw-r--r-- | pygments/lexers/_mapping.py | 1 | ||||
| -rw-r--r-- | pygments/lexers/webassembly.py | 120 | ||||
| -rw-r--r-- | tests/examplefiles/wat/fib.wat | 35 | ||||
| -rw-r--r-- | tests/examplefiles/wat/fib.wat.output | 234 | ||||
| -rw-r--r-- | tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt | 14 | ||||
| -rw-r--r-- | tests/snippets/wat/test_comment_with_open_paren.txt | 10 | ||||
| -rw-r--r-- | tests/snippets/wat/test_comment_with_semicolon.txt | 10 | ||||
| -rw-r--r-- | tests/snippets/wat/test_i32_const_is_builtin.txt | 6 | ||||
| -rw-r--r-- | tests/snippets/wat/test_multiline_comment.txt | 11 | ||||
| -rw-r--r-- | tests/snippets/wat/test_nested_comment.txt | 14 | ||||
| -rw-r--r-- | tests/snippets/wat/test_string_byte_escape.txt | 9 | ||||
| -rw-r--r-- | tests/snippets/wat/test_string_with_escape.txt | 9 | ||||
| -rw-r--r-- | tests/snippets/wat/test_variable_name_pattern.txt | 6 |
13 files changed, 479 insertions, 0 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index e51c8be1..3d3175e5 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -489,6 +489,7 @@ LEXERS = { 'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)), 'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)), 'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()), + 'WatLexer': ('pygments.lexers.webassembly', 'WebAssembly', ('wast', 'wat'), ('*.wat', '*.wast'), ()), 'WebIDLLexer': ('pygments.lexers.webidl', 'Web IDL', ('webidl',), ('*.webidl',), ()), 'WhileyLexer': ('pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)), 'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)), diff --git a/pygments/lexers/webassembly.py b/pygments/lexers/webassembly.py new file mode 100644 index 00000000..d162a46f --- /dev/null +++ b/pygments/lexers/webassembly.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.webassembly + ~~~~~~~~~~~~~~~~~~~ + + Lexers for the WebAssembly text format. + + The grammar can be found at https://github.com/WebAssembly/spec/blob/master/interpreter/README.md + and https://webassembly.github.io/spec/core/text/. + + + :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, words, bygroups, default +from pygments.token import Text, Comment, Operator, Keyword, String, Number, Punctuation, Literal, Error, Name + +__all__ = ['WatLexer'] + +keywords = ( + 'module', 'import', 'func', 'funcref', 'start', 'param', 'local', 'type', + 'result', 'export', 'memory', 'global', 'mut', 'data', 'table', 'elem', + 'if', 'then', 'else', 'end', 'block', 'loop' +) + +builtins = ( + 'unreachable', 'nop', 'block', 'loop', 'if', 'else', 'end', 'br', 'br_if', + 'br_table', 'return', 'call', 'call_indirect', 'drop', 'select', + 'local.get', 'local.set', 'local.tee', 'global.get', 'global.set', + 'i32.load', 'i64.load', 'f32.load', 'f64.load', 'i32.load8_s', + 'i32.load8_u', 'i32.load16_s', 'i32.load16_u', 'i64.load8_s', + 'i64.load8_u', 'i64.load16_s', 'i64.load16_u', 'i64.load32_s', + 'i64.load32_u', 'i32.store', 'i64.store', 'f32.store', 'f64.store', + 'i32.store8', 'i32.store16', 'i64.store8', 'i64.store16', 'i64.store32', + 'memory.size', 'memory.grow', 'i32.const', 'i64.const', 'f32.const', + 'f64.const', 'i32.eqz', 'i32.eq', 'i32.ne', 'i32.lt_s', 'i32.lt_u', + 'i32.gt_s', 'i32.gt_u', 'i32.le_s', 'i32.le_u', 'i32.ge_s', 'i32.ge_u', + 'i64.eqz', 'i64.eq', 'i64.ne', 'i64.lt_s', 'i64.lt_u', 'i64.gt_s', + 'i64.gt_u', 'i64.le_s', 'i64.le_u', 'i64.ge_s', 'i64.ge_u', 'f32.eq', + 'f32.ne', 'f32.lt', 'f32.gt', 'f32.le', 'f32.ge', 'f64.eq', 'f64.ne', + 'f64.lt', 'f64.gt', 'f64.le', 'f64.ge', 'i32.clz', 'i32.ctz', 'i32.popcnt', + 'i32.add', 'i32.sub', 'i32.mul', 'i32.div_s', 'i32.div_u', 'i32.rem_s', + 'i32.rem_u', 'i32.and', 'i32.or', 'i32.xor', 'i32.shl', 'i32.shr_s', + 'i32.shr_u', 'i32.rotl', 'i32.rotr', 'i64.clz', 'i64.ctz', 'i64.popcnt', + 'i64.add', 'i64.sub', 'i64.mul', 'i64.div_s', 'i64.div_u', 'i64.rem_s', + 'i64.rem_u', 'i64.and', 'i64.or', 'i64.xor', 'i64.shl', 'i64.shr_s', + 'i64.shr_u', 'i64.rotl', 'i64.rotr', 'f32.abs', 'f32.neg', 'f32.ceil', + 'f32.floor', 'f32.trunc', 'f32.nearest', 'f32.sqrt', 'f32.add', 'f32.sub', + 'f32.mul', 'f32.div', 'f32.min', 'f32.max', 'f32.copysign', 'f64.abs', + 'f64.neg', 'f64.ceil', 'f64.floor', 'f64.trunc', 'f64.nearest', 'f64.sqrt', + 'f64.add', 'f64.sub', 'f64.mul', 'f64.div', 'f64.min', 'f64.max', + 'f64.copysign', 'i32.wrap_i64', 'i32.trunc_f32_s', 'i32.trunc_f32_u', + 'i32.trunc_f64_s', 'i32.trunc_f64_u', 'i64.extend_i32_s', + 'i64.extend_i32_u', 'i64.trunc_f32_s', 'i64.trunc_f32_u', + 'i64.trunc_f64_s', 'i64.trunc_f64_u', 'f32.convert_i32_s', + 'f32.convert_i32_u', 'f32.convert_i64_s', 'f32.convert_i64_u', + 'f32.demote_f64', 'f64.convert_i32_s', 'f64.convert_i32_u', + 'f64.convert_i64_s', 'f64.convert_i64_u', 'f64.promote_f32', + 'i32.reinterpret_f32', 'i64.reinterpret_f64', 'f32.reinterpret_i32', + 'f64.reinterpret_i64', +) + + +class WatLexer(RegexLexer): + """Lexer for the `WebAssembly text format <https://webassembly.org/>`_. + + .. versionadded:: 2.9 + """ + + name = 'WebAssembly' + aliases = ['wast', 'wat'] + filenames = ['*.wat', '*.wast'] + + tokens = { + 'root': [ + (words(keywords, suffix=r'(?=[^a-z_\.])'), Keyword), + (words(builtins), Name.Builtin, 'arguments'), + (words(['i32', 'i64', 'f32', 'f64']), Keyword.Type), + (r'\$[A-Za-z0-9!#$%&\'*+./:<=>?@\\^_`|~-]+', Name.Variable), # yes, all of the are valid in identifiers + (r';;.*?$', Comment.Single), + (r'\(;', Comment.Multiline, 'nesting_comment'), + (r'[+-]?0x[\dA-Fa-f](_?[\dA-Fa-f])*(.([\dA-Fa-f](_?[\dA-Fa-f])*)?)?([pP][+-]?[\dA-Fa-f](_?[\dA-Fa-f])*)?', Number.Float), + (r'[+-]?\d.\d(_?\d)*[eE][+-]?\d(_?\d)*', Number.Float), + (r'[+-]?\d.\d(_?\d)*', Number.Float), + (r'[+-]?\d.[eE][+-]?\d(_?\d)*', Number.Float), + (r'[+-]?(inf|nan:0x[\dA-Fa-f](_?[\dA-Fa-f])*|nan)', Number.Float), + (r'[+-]?0x[\dA-Fa-f](_?[\dA-Fa-f])*', Number.Hex), + (r'[+-]?\d(_?\d)*', Number.Integer), + (r'[\(\)]', Punctuation), + (r'"', String.Double, 'string'), + (r'\s+', Text), + ], + 'nesting_comment': [ + (r'\(;', Comment.Multiline, '#push'), + (r';\)', Comment.Multiline, '#pop'), + (r'[^;(]+', Comment.Multiline), + (r'[;(]', Comment.Multiline), + ], + 'string': [ + (r'\\[\dA-Fa-f][\dA-Fa-f]', String.Escape), # must have exactly two hex digits + (r'\\t', String.Escape), + (r'\\n', String.Escape), + (r'\\r', String.Escape), + (r'\\"', String.Escape), + (r"\\'", String.Escape), + (r'\\u\{[\dA-Fa-f](_?[\dA-Fa-f])*\}', String.Escape), + (r'\\\\', String.Escape), + (r'"', String.Double, '#pop'), + (r'[^"\\]+', String.Double), + ], + 'arguments': [ + (r'\s+', Text), + (r'(offset)(=)(0x[\dA-Fa-f](_?[\dA-Fa-f])*)', bygroups(Keyword, Operator, Number.Hex)), + (r'(offset)(=)(\d(_?\d)*)', bygroups(Keyword, Operator, Number.Integer)), + (r'(align)(=)(0x[\dA-Fa-f](_?[\dA-Fa-f])*)', bygroups(Keyword, Operator, Number.Hex)), + (r'(align)(=)(\d(_?\d)*)', bygroups(Keyword, Operator, Number.Integer)), + default('#pop'), + ] + } diff --git a/tests/examplefiles/wat/fib.wat b/tests/examplefiles/wat/fib.wat new file mode 100644 index 00000000..c14b3034 --- /dev/null +++ b/tests/examplefiles/wat/fib.wat @@ -0,0 +1,35 @@ +(module + (func $fib (param $n i32) (result i32) + (local $a i32) + (local $b i32) + (local $result i32) + (if + (i32.eqz (local.get $n)) + (then + (return (i32.const 1)) + ) + ) + (local.set $b (i32.const 1)) + (; nested (; comment ;) ;) + loop + (local.set $result (i32.add (local.get $a) (local.get $b))) + (local.set $a (local.get $b)) + (local.set $b (local.get $result)) + + ;; decrement $n + (local.tee $n (i32.sub (local.get $n) (i32.const 1))) + + (; test if $n > 0 ;) + (i32.gt_u (i32.const 0)) + + ;; if so, jump to the beginning of the loop + br_if 0 + end + local.get $result + ) + (func $test_memory_store_args + i32.const 1 + f64.store align=8 offset=16 + ) + (export "fib" (func $fib)) +)
\ No newline at end of file diff --git a/tests/examplefiles/wat/fib.wat.output b/tests/examplefiles/wat/fib.wat.output new file mode 100644 index 00000000..e170228e --- /dev/null +++ b/tests/examplefiles/wat/fib.wat.output @@ -0,0 +1,234 @@ +'(' Punctuation +'module' Keyword +'\n ' Text +'(' Punctuation +'func' Keyword +' ' Text +'$fib' Name.Variable +' ' Text +'(' Punctuation +'param' Keyword +' ' Text +'$n' Name.Variable +' ' Text +'i32' Keyword.Type +')' Punctuation +' ' Text +'(' Punctuation +'result' Keyword +' ' Text +'i32' Keyword.Type +')' Punctuation +'\n ' Text +'(' Punctuation +'local' Keyword +' ' Text +'$a' Name.Variable +' ' Text +'i32' Keyword.Type +')' Punctuation +'\n ' Text +'(' Punctuation +'local' Keyword +' ' Text +'$b' Name.Variable +' ' Text +'i32' Keyword.Type +')' Punctuation +'\n ' Text +'(' Punctuation +'local' Keyword +' ' Text +'$result' Name.Variable +' ' Text +'i32' Keyword.Type +')' Punctuation +'\n ' Text +'(' Punctuation +'if' Keyword +'\n ' Text +'(' Punctuation +'i32.eqz' Name.Builtin +' ' Text +'(' Punctuation +'local.get' Name.Builtin +' ' Text +'$n' Name.Variable +')' Punctuation +')' Punctuation +'\n ' Text +'(' Punctuation +'then' Keyword +'\n ' Text +'(' Punctuation +'return' Name.Builtin +' ' Text +'(' Punctuation +'i32.const' Name.Builtin +' ' Text +'1' Literal.Number.Integer +')' Punctuation +')' Punctuation +'\n ' Text +')' Punctuation +'\n ' Text +')' Punctuation +'\n ' Text +'(' Punctuation +'local.set' Name.Builtin +' ' Text +'$b' Name.Variable +' ' Text +'(' Punctuation +'i32.const' Name.Builtin +' ' Text +'1' Literal.Number.Integer +')' Punctuation +')' Punctuation +'\n ' Text +'(;' Comment.Multiline +' nested ' Comment.Multiline +'(;' Comment.Multiline +' comment ' Comment.Multiline +';)' Comment.Multiline +' ' Comment.Multiline +';)' Comment.Multiline +'\n ' Text +'loop' Keyword +'\n ' Text +'(' Punctuation +'local.set' Name.Builtin +' ' Text +'$result' Name.Variable +' ' Text +'(' Punctuation +'i32.add' Name.Builtin +' ' Text +'(' Punctuation +'local.get' Name.Builtin +' ' Text +'$a' Name.Variable +')' Punctuation +' ' Text +'(' Punctuation +'local.get' Name.Builtin +' ' Text +'$b' Name.Variable +')' Punctuation +')' Punctuation +')' Punctuation +'\n ' Text +'(' Punctuation +'local.set' Name.Builtin +' ' Text +'$a' Name.Variable +' ' Text +'(' Punctuation +'local.get' Name.Builtin +' ' Text +'$b' Name.Variable +')' Punctuation +')' Punctuation +'\n ' Text +'(' Punctuation +'local.set' Name.Builtin +' ' Text +'$b' Name.Variable +' ' Text +'(' Punctuation +'local.get' Name.Builtin +' ' Text +'$result' Name.Variable +')' Punctuation +')' Punctuation +'\n\n ' Text +';; decrement $n' Comment.Single +'\n ' Text +'(' Punctuation +'local.tee' Name.Builtin +' ' Text +'$n' Name.Variable +' ' Text +'(' Punctuation +'i32.sub' Name.Builtin +' ' Text +'(' Punctuation +'local.get' Name.Builtin +' ' Text +'$n' Name.Variable +')' Punctuation +' ' Text +'(' Punctuation +'i32.const' Name.Builtin +' ' Text +'1' Literal.Number.Integer +')' Punctuation +')' Punctuation +')' Punctuation +'\n\n ' Text +'(;' Comment.Multiline +' test if $n > 0 ' Comment.Multiline +';)' Comment.Multiline +'\n ' Text +'(' Punctuation +'i32.gt_u' Name.Builtin +' ' Text +'(' Punctuation +'i32.const' Name.Builtin +' ' Text +'0' Literal.Number.Integer +')' Punctuation +')' Punctuation +'\n\n ' Text +';; if so, jump to the beginning of the loop' Comment.Single +'\n ' Text +'br_if' Name.Builtin +' ' Text +'0' Literal.Number.Integer +'\n ' Text +'end' Keyword +'\n ' Text +'local.get' Name.Builtin +' ' Text +'$result' Name.Variable +'\n ' Text +')' Punctuation +'\n ' Text +'(' Punctuation +'func' Keyword +' ' Text +'$test_memory_store_args' Name.Variable +'\n ' Text +'i32.const' Name.Builtin +' ' Text +'1' Literal.Number.Integer +'\n ' Text +'f64.store' Name.Builtin +' ' Text +'align' Keyword +'=' Operator +'8' Literal.Number.Integer +' ' Text +'offset' Keyword +'=' Operator +'16' Literal.Number.Integer +'\n ' Text +')' Punctuation +'\n ' Text +'(' Punctuation +'export' Keyword +' ' Text +'"' Literal.String.Double +'fib' Literal.String.Double +'"' Literal.String.Double +' ' Text +'(' Punctuation +'func' Keyword +' ' Text +'$fib' Name.Variable +')' Punctuation +')' Punctuation +'\n' Text + +')' Punctuation +'\n' Text diff --git a/tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt b/tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt new file mode 100644 index 00000000..919e1d21 --- /dev/null +++ b/tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt @@ -0,0 +1,14 @@ +---input--- +i32.store offset=0xdeadbeef align=0x1 + +---tokens--- +'i32.store' Name.Builtin +' ' Text +'offset' Keyword +'=' Operator +'0xdeadbeef' Literal.Number.Hex +' ' Text +'align' Keyword +'=' Operator +'0x1' Literal.Number.Hex +'\n' Text diff --git a/tests/snippets/wat/test_comment_with_open_paren.txt b/tests/snippets/wat/test_comment_with_open_paren.txt new file mode 100644 index 00000000..631de4cb --- /dev/null +++ b/tests/snippets/wat/test_comment_with_open_paren.txt @@ -0,0 +1,10 @@ +---input--- +(; comment with ( open paren ;) + +---tokens--- +'(;' Comment.Multiline +' comment with ' Comment.Multiline +'(' Comment.Multiline +' open paren ' Comment.Multiline +';)' Comment.Multiline +'\n' Text diff --git a/tests/snippets/wat/test_comment_with_semicolon.txt b/tests/snippets/wat/test_comment_with_semicolon.txt new file mode 100644 index 00000000..0cd31123 --- /dev/null +++ b/tests/snippets/wat/test_comment_with_semicolon.txt @@ -0,0 +1,10 @@ +---input--- +(; comment with ; semicolon ;) + +---tokens--- +'(;' Comment.Multiline +' comment with ' Comment.Multiline +';' Comment.Multiline +' semicolon ' Comment.Multiline +';)' Comment.Multiline +'\n' Text diff --git a/tests/snippets/wat/test_i32_const_is_builtin.txt b/tests/snippets/wat/test_i32_const_is_builtin.txt new file mode 100644 index 00000000..740907cc --- /dev/null +++ b/tests/snippets/wat/test_i32_const_is_builtin.txt @@ -0,0 +1,6 @@ +---input--- +i32.const + +---tokens--- +'i32.const' Name.Builtin +'\n' Text diff --git a/tests/snippets/wat/test_multiline_comment.txt b/tests/snippets/wat/test_multiline_comment.txt new file mode 100644 index 00000000..6cbd45ea --- /dev/null +++ b/tests/snippets/wat/test_multiline_comment.txt @@ -0,0 +1,11 @@ +---input--- +(; + comment +;) + +---tokens--- +'(;' Comment.Multiline +'\n comment\n' Comment.Multiline + +';)' Comment.Multiline +'\n' Text diff --git a/tests/snippets/wat/test_nested_comment.txt b/tests/snippets/wat/test_nested_comment.txt new file mode 100644 index 00000000..de072939 --- /dev/null +++ b/tests/snippets/wat/test_nested_comment.txt @@ -0,0 +1,14 @@ +---input--- +(; +nested(;;)comment +;) + +---tokens--- +'(;' Comment.Multiline +'\nnested' Comment.Multiline +'(;' Comment.Multiline +';)' Comment.Multiline +'comment\n' Comment.Multiline + +';)' Comment.Multiline +'\n' Text diff --git a/tests/snippets/wat/test_string_byte_escape.txt b/tests/snippets/wat/test_string_byte_escape.txt new file mode 100644 index 00000000..c0b9e4a0 --- /dev/null +++ b/tests/snippets/wat/test_string_byte_escape.txt @@ -0,0 +1,9 @@ +---input--- +"\001" + +---tokens--- +'"' Literal.String.Double +'\\00' Literal.String.Escape +'1' Literal.String.Double +'"' Literal.String.Double +'\n' Text diff --git a/tests/snippets/wat/test_string_with_escape.txt b/tests/snippets/wat/test_string_with_escape.txt new file mode 100644 index 00000000..c978faa4 --- /dev/null +++ b/tests/snippets/wat/test_string_with_escape.txt @@ -0,0 +1,9 @@ +---input--- +"string\t" + +---tokens--- +'"' Literal.String.Double +'string' Literal.String.Double +'\\t' Literal.String.Escape +'"' Literal.String.Double +'\n' Text diff --git a/tests/snippets/wat/test_variable_name_pattern.txt b/tests/snippets/wat/test_variable_name_pattern.txt new file mode 100644 index 00000000..d305ab94 --- /dev/null +++ b/tests/snippets/wat/test_variable_name_pattern.txt @@ -0,0 +1,6 @@ +---input--- +$ABCabc123!#$%&'*+./:<=>?@\\^_`|~-A + +---tokens--- +"$ABCabc123!#$%&'*+./:<=>?@\\\\^_`|~-A" Name.Variable +'\n' Text |
