summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJendrik <12938023+jendrikw@users.noreply.github.com>2021-04-04 17:50:00 +0200
committerGitHub <noreply@github.com>2021-04-04 17:50:00 +0200
commit06f2eba8431a956c84fba9777f8bb2edcf46beee (patch)
tree4e5812b84d148e7b0fcce2a1c36458183c1d89ae
parent96973db5d787c794df20b4fd0b5278d4c5f64641 (diff)
downloadpygments-git-06f2eba8431a956c84fba9777f8bb2edcf46beee.tar.gz
Fix #1416: add WebAssembly lexer (#1564)
* add WebAssembly lexer * avoid test failure by using the default function instead of an empty regex * address small issues * fix WebAssembly string escapes * change WebAssembly multiline comment parsing * update copyright year * set versionadded to 2.9 * change WebAssembly tests to use the new test system * change WebAssembly unit test to use snippets
-rw-r--r--pygments/lexers/_mapping.py1
-rw-r--r--pygments/lexers/webassembly.py120
-rw-r--r--tests/examplefiles/wat/fib.wat35
-rw-r--r--tests/examplefiles/wat/fib.wat.output234
-rw-r--r--tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt14
-rw-r--r--tests/snippets/wat/test_comment_with_open_paren.txt10
-rw-r--r--tests/snippets/wat/test_comment_with_semicolon.txt10
-rw-r--r--tests/snippets/wat/test_i32_const_is_builtin.txt6
-rw-r--r--tests/snippets/wat/test_multiline_comment.txt11
-rw-r--r--tests/snippets/wat/test_nested_comment.txt14
-rw-r--r--tests/snippets/wat/test_string_byte_escape.txt9
-rw-r--r--tests/snippets/wat/test_string_with_escape.txt9
-rw-r--r--tests/snippets/wat/test_variable_name_pattern.txt6
13 files changed, 479 insertions, 0 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index e51c8be1..3d3175e5 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -489,6 +489,7 @@ LEXERS = {
'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)),
'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)),
'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()),
+ 'WatLexer': ('pygments.lexers.webassembly', 'WebAssembly', ('wast', 'wat'), ('*.wat', '*.wast'), ()),
'WebIDLLexer': ('pygments.lexers.webidl', 'Web IDL', ('webidl',), ('*.webidl',), ()),
'WhileyLexer': ('pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)),
'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)),
diff --git a/pygments/lexers/webassembly.py b/pygments/lexers/webassembly.py
new file mode 100644
index 00000000..d162a46f
--- /dev/null
+++ b/pygments/lexers/webassembly.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.webassembly
+ ~~~~~~~~~~~~~~~~~~~
+
+ Lexers for the WebAssembly text format.
+
+ The grammar can be found at https://github.com/WebAssembly/spec/blob/master/interpreter/README.md
+ and https://webassembly.github.io/spec/core/text/.
+
+
+ :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from pygments.lexer import RegexLexer, words, bygroups, default
+from pygments.token import Text, Comment, Operator, Keyword, String, Number, Punctuation, Literal, Error, Name
+
+__all__ = ['WatLexer']
+
+keywords = (
+ 'module', 'import', 'func', 'funcref', 'start', 'param', 'local', 'type',
+ 'result', 'export', 'memory', 'global', 'mut', 'data', 'table', 'elem',
+ 'if', 'then', 'else', 'end', 'block', 'loop'
+)
+
+builtins = (
+ 'unreachable', 'nop', 'block', 'loop', 'if', 'else', 'end', 'br', 'br_if',
+ 'br_table', 'return', 'call', 'call_indirect', 'drop', 'select',
+ 'local.get', 'local.set', 'local.tee', 'global.get', 'global.set',
+ 'i32.load', 'i64.load', 'f32.load', 'f64.load', 'i32.load8_s',
+ 'i32.load8_u', 'i32.load16_s', 'i32.load16_u', 'i64.load8_s',
+ 'i64.load8_u', 'i64.load16_s', 'i64.load16_u', 'i64.load32_s',
+ 'i64.load32_u', 'i32.store', 'i64.store', 'f32.store', 'f64.store',
+ 'i32.store8', 'i32.store16', 'i64.store8', 'i64.store16', 'i64.store32',
+ 'memory.size', 'memory.grow', 'i32.const', 'i64.const', 'f32.const',
+ 'f64.const', 'i32.eqz', 'i32.eq', 'i32.ne', 'i32.lt_s', 'i32.lt_u',
+ 'i32.gt_s', 'i32.gt_u', 'i32.le_s', 'i32.le_u', 'i32.ge_s', 'i32.ge_u',
+ 'i64.eqz', 'i64.eq', 'i64.ne', 'i64.lt_s', 'i64.lt_u', 'i64.gt_s',
+ 'i64.gt_u', 'i64.le_s', 'i64.le_u', 'i64.ge_s', 'i64.ge_u', 'f32.eq',
+ 'f32.ne', 'f32.lt', 'f32.gt', 'f32.le', 'f32.ge', 'f64.eq', 'f64.ne',
+ 'f64.lt', 'f64.gt', 'f64.le', 'f64.ge', 'i32.clz', 'i32.ctz', 'i32.popcnt',
+ 'i32.add', 'i32.sub', 'i32.mul', 'i32.div_s', 'i32.div_u', 'i32.rem_s',
+ 'i32.rem_u', 'i32.and', 'i32.or', 'i32.xor', 'i32.shl', 'i32.shr_s',
+ 'i32.shr_u', 'i32.rotl', 'i32.rotr', 'i64.clz', 'i64.ctz', 'i64.popcnt',
+ 'i64.add', 'i64.sub', 'i64.mul', 'i64.div_s', 'i64.div_u', 'i64.rem_s',
+ 'i64.rem_u', 'i64.and', 'i64.or', 'i64.xor', 'i64.shl', 'i64.shr_s',
+ 'i64.shr_u', 'i64.rotl', 'i64.rotr', 'f32.abs', 'f32.neg', 'f32.ceil',
+ 'f32.floor', 'f32.trunc', 'f32.nearest', 'f32.sqrt', 'f32.add', 'f32.sub',
+ 'f32.mul', 'f32.div', 'f32.min', 'f32.max', 'f32.copysign', 'f64.abs',
+ 'f64.neg', 'f64.ceil', 'f64.floor', 'f64.trunc', 'f64.nearest', 'f64.sqrt',
+ 'f64.add', 'f64.sub', 'f64.mul', 'f64.div', 'f64.min', 'f64.max',
+ 'f64.copysign', 'i32.wrap_i64', 'i32.trunc_f32_s', 'i32.trunc_f32_u',
+ 'i32.trunc_f64_s', 'i32.trunc_f64_u', 'i64.extend_i32_s',
+ 'i64.extend_i32_u', 'i64.trunc_f32_s', 'i64.trunc_f32_u',
+ 'i64.trunc_f64_s', 'i64.trunc_f64_u', 'f32.convert_i32_s',
+ 'f32.convert_i32_u', 'f32.convert_i64_s', 'f32.convert_i64_u',
+ 'f32.demote_f64', 'f64.convert_i32_s', 'f64.convert_i32_u',
+ 'f64.convert_i64_s', 'f64.convert_i64_u', 'f64.promote_f32',
+ 'i32.reinterpret_f32', 'i64.reinterpret_f64', 'f32.reinterpret_i32',
+ 'f64.reinterpret_i64',
+)
+
+
+class WatLexer(RegexLexer):
+ """Lexer for the `WebAssembly text format <https://webassembly.org/>`_.
+
+ .. versionadded:: 2.9
+ """
+
+ name = 'WebAssembly'
+ aliases = ['wast', 'wat']
+ filenames = ['*.wat', '*.wast']
+
+ tokens = {
+ 'root': [
+ (words(keywords, suffix=r'(?=[^a-z_\.])'), Keyword),
+ (words(builtins), Name.Builtin, 'arguments'),
+ (words(['i32', 'i64', 'f32', 'f64']), Keyword.Type),
+ (r'\$[A-Za-z0-9!#$%&\'*+./:<=>?@\\^_`|~-]+', Name.Variable), # yes, all of the are valid in identifiers
+ (r';;.*?$', Comment.Single),
+ (r'\(;', Comment.Multiline, 'nesting_comment'),
+ (r'[+-]?0x[\dA-Fa-f](_?[\dA-Fa-f])*(.([\dA-Fa-f](_?[\dA-Fa-f])*)?)?([pP][+-]?[\dA-Fa-f](_?[\dA-Fa-f])*)?', Number.Float),
+ (r'[+-]?\d.\d(_?\d)*[eE][+-]?\d(_?\d)*', Number.Float),
+ (r'[+-]?\d.\d(_?\d)*', Number.Float),
+ (r'[+-]?\d.[eE][+-]?\d(_?\d)*', Number.Float),
+ (r'[+-]?(inf|nan:0x[\dA-Fa-f](_?[\dA-Fa-f])*|nan)', Number.Float),
+ (r'[+-]?0x[\dA-Fa-f](_?[\dA-Fa-f])*', Number.Hex),
+ (r'[+-]?\d(_?\d)*', Number.Integer),
+ (r'[\(\)]', Punctuation),
+ (r'"', String.Double, 'string'),
+ (r'\s+', Text),
+ ],
+ 'nesting_comment': [
+ (r'\(;', Comment.Multiline, '#push'),
+ (r';\)', Comment.Multiline, '#pop'),
+ (r'[^;(]+', Comment.Multiline),
+ (r'[;(]', Comment.Multiline),
+ ],
+ 'string': [
+ (r'\\[\dA-Fa-f][\dA-Fa-f]', String.Escape), # must have exactly two hex digits
+ (r'\\t', String.Escape),
+ (r'\\n', String.Escape),
+ (r'\\r', String.Escape),
+ (r'\\"', String.Escape),
+ (r"\\'", String.Escape),
+ (r'\\u\{[\dA-Fa-f](_?[\dA-Fa-f])*\}', String.Escape),
+ (r'\\\\', String.Escape),
+ (r'"', String.Double, '#pop'),
+ (r'[^"\\]+', String.Double),
+ ],
+ 'arguments': [
+ (r'\s+', Text),
+ (r'(offset)(=)(0x[\dA-Fa-f](_?[\dA-Fa-f])*)', bygroups(Keyword, Operator, Number.Hex)),
+ (r'(offset)(=)(\d(_?\d)*)', bygroups(Keyword, Operator, Number.Integer)),
+ (r'(align)(=)(0x[\dA-Fa-f](_?[\dA-Fa-f])*)', bygroups(Keyword, Operator, Number.Hex)),
+ (r'(align)(=)(\d(_?\d)*)', bygroups(Keyword, Operator, Number.Integer)),
+ default('#pop'),
+ ]
+ }
diff --git a/tests/examplefiles/wat/fib.wat b/tests/examplefiles/wat/fib.wat
new file mode 100644
index 00000000..c14b3034
--- /dev/null
+++ b/tests/examplefiles/wat/fib.wat
@@ -0,0 +1,35 @@
+(module
+ (func $fib (param $n i32) (result i32)
+ (local $a i32)
+ (local $b i32)
+ (local $result i32)
+ (if
+ (i32.eqz (local.get $n))
+ (then
+ (return (i32.const 1))
+ )
+ )
+ (local.set $b (i32.const 1))
+ (; nested (; comment ;) ;)
+ loop
+ (local.set $result (i32.add (local.get $a) (local.get $b)))
+ (local.set $a (local.get $b))
+ (local.set $b (local.get $result))
+
+ ;; decrement $n
+ (local.tee $n (i32.sub (local.get $n) (i32.const 1)))
+
+ (; test if $n > 0 ;)
+ (i32.gt_u (i32.const 0))
+
+ ;; if so, jump to the beginning of the loop
+ br_if 0
+ end
+ local.get $result
+ )
+ (func $test_memory_store_args
+ i32.const 1
+ f64.store align=8 offset=16
+ )
+ (export "fib" (func $fib))
+) \ No newline at end of file
diff --git a/tests/examplefiles/wat/fib.wat.output b/tests/examplefiles/wat/fib.wat.output
new file mode 100644
index 00000000..e170228e
--- /dev/null
+++ b/tests/examplefiles/wat/fib.wat.output
@@ -0,0 +1,234 @@
+'(' Punctuation
+'module' Keyword
+'\n ' Text
+'(' Punctuation
+'func' Keyword
+' ' Text
+'$fib' Name.Variable
+' ' Text
+'(' Punctuation
+'param' Keyword
+' ' Text
+'$n' Name.Variable
+' ' Text
+'i32' Keyword.Type
+')' Punctuation
+' ' Text
+'(' Punctuation
+'result' Keyword
+' ' Text
+'i32' Keyword.Type
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'local' Keyword
+' ' Text
+'$a' Name.Variable
+' ' Text
+'i32' Keyword.Type
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'local' Keyword
+' ' Text
+'$b' Name.Variable
+' ' Text
+'i32' Keyword.Type
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'local' Keyword
+' ' Text
+'$result' Name.Variable
+' ' Text
+'i32' Keyword.Type
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'if' Keyword
+'\n ' Text
+'(' Punctuation
+'i32.eqz' Name.Builtin
+' ' Text
+'(' Punctuation
+'local.get' Name.Builtin
+' ' Text
+'$n' Name.Variable
+')' Punctuation
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'then' Keyword
+'\n ' Text
+'(' Punctuation
+'return' Name.Builtin
+' ' Text
+'(' Punctuation
+'i32.const' Name.Builtin
+' ' Text
+'1' Literal.Number.Integer
+')' Punctuation
+')' Punctuation
+'\n ' Text
+')' Punctuation
+'\n ' Text
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'local.set' Name.Builtin
+' ' Text
+'$b' Name.Variable
+' ' Text
+'(' Punctuation
+'i32.const' Name.Builtin
+' ' Text
+'1' Literal.Number.Integer
+')' Punctuation
+')' Punctuation
+'\n ' Text
+'(;' Comment.Multiline
+' nested ' Comment.Multiline
+'(;' Comment.Multiline
+' comment ' Comment.Multiline
+';)' Comment.Multiline
+' ' Comment.Multiline
+';)' Comment.Multiline
+'\n ' Text
+'loop' Keyword
+'\n ' Text
+'(' Punctuation
+'local.set' Name.Builtin
+' ' Text
+'$result' Name.Variable
+' ' Text
+'(' Punctuation
+'i32.add' Name.Builtin
+' ' Text
+'(' Punctuation
+'local.get' Name.Builtin
+' ' Text
+'$a' Name.Variable
+')' Punctuation
+' ' Text
+'(' Punctuation
+'local.get' Name.Builtin
+' ' Text
+'$b' Name.Variable
+')' Punctuation
+')' Punctuation
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'local.set' Name.Builtin
+' ' Text
+'$a' Name.Variable
+' ' Text
+'(' Punctuation
+'local.get' Name.Builtin
+' ' Text
+'$b' Name.Variable
+')' Punctuation
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'local.set' Name.Builtin
+' ' Text
+'$b' Name.Variable
+' ' Text
+'(' Punctuation
+'local.get' Name.Builtin
+' ' Text
+'$result' Name.Variable
+')' Punctuation
+')' Punctuation
+'\n\n ' Text
+';; decrement $n' Comment.Single
+'\n ' Text
+'(' Punctuation
+'local.tee' Name.Builtin
+' ' Text
+'$n' Name.Variable
+' ' Text
+'(' Punctuation
+'i32.sub' Name.Builtin
+' ' Text
+'(' Punctuation
+'local.get' Name.Builtin
+' ' Text
+'$n' Name.Variable
+')' Punctuation
+' ' Text
+'(' Punctuation
+'i32.const' Name.Builtin
+' ' Text
+'1' Literal.Number.Integer
+')' Punctuation
+')' Punctuation
+')' Punctuation
+'\n\n ' Text
+'(;' Comment.Multiline
+' test if $n > 0 ' Comment.Multiline
+';)' Comment.Multiline
+'\n ' Text
+'(' Punctuation
+'i32.gt_u' Name.Builtin
+' ' Text
+'(' Punctuation
+'i32.const' Name.Builtin
+' ' Text
+'0' Literal.Number.Integer
+')' Punctuation
+')' Punctuation
+'\n\n ' Text
+';; if so, jump to the beginning of the loop' Comment.Single
+'\n ' Text
+'br_if' Name.Builtin
+' ' Text
+'0' Literal.Number.Integer
+'\n ' Text
+'end' Keyword
+'\n ' Text
+'local.get' Name.Builtin
+' ' Text
+'$result' Name.Variable
+'\n ' Text
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'func' Keyword
+' ' Text
+'$test_memory_store_args' Name.Variable
+'\n ' Text
+'i32.const' Name.Builtin
+' ' Text
+'1' Literal.Number.Integer
+'\n ' Text
+'f64.store' Name.Builtin
+' ' Text
+'align' Keyword
+'=' Operator
+'8' Literal.Number.Integer
+' ' Text
+'offset' Keyword
+'=' Operator
+'16' Literal.Number.Integer
+'\n ' Text
+')' Punctuation
+'\n ' Text
+'(' Punctuation
+'export' Keyword
+' ' Text
+'"' Literal.String.Double
+'fib' Literal.String.Double
+'"' Literal.String.Double
+' ' Text
+'(' Punctuation
+'func' Keyword
+' ' Text
+'$fib' Name.Variable
+')' Punctuation
+')' Punctuation
+'\n' Text
+
+')' Punctuation
+'\n' Text
diff --git a/tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt b/tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt
new file mode 100644
index 00000000..919e1d21
--- /dev/null
+++ b/tests/snippets/wat/test_align_and_offset_accept_hexadecimal_numbers.txt
@@ -0,0 +1,14 @@
+---input---
+i32.store offset=0xdeadbeef align=0x1
+
+---tokens---
+'i32.store' Name.Builtin
+' ' Text
+'offset' Keyword
+'=' Operator
+'0xdeadbeef' Literal.Number.Hex
+' ' Text
+'align' Keyword
+'=' Operator
+'0x1' Literal.Number.Hex
+'\n' Text
diff --git a/tests/snippets/wat/test_comment_with_open_paren.txt b/tests/snippets/wat/test_comment_with_open_paren.txt
new file mode 100644
index 00000000..631de4cb
--- /dev/null
+++ b/tests/snippets/wat/test_comment_with_open_paren.txt
@@ -0,0 +1,10 @@
+---input---
+(; comment with ( open paren ;)
+
+---tokens---
+'(;' Comment.Multiline
+' comment with ' Comment.Multiline
+'(' Comment.Multiline
+' open paren ' Comment.Multiline
+';)' Comment.Multiline
+'\n' Text
diff --git a/tests/snippets/wat/test_comment_with_semicolon.txt b/tests/snippets/wat/test_comment_with_semicolon.txt
new file mode 100644
index 00000000..0cd31123
--- /dev/null
+++ b/tests/snippets/wat/test_comment_with_semicolon.txt
@@ -0,0 +1,10 @@
+---input---
+(; comment with ; semicolon ;)
+
+---tokens---
+'(;' Comment.Multiline
+' comment with ' Comment.Multiline
+';' Comment.Multiline
+' semicolon ' Comment.Multiline
+';)' Comment.Multiline
+'\n' Text
diff --git a/tests/snippets/wat/test_i32_const_is_builtin.txt b/tests/snippets/wat/test_i32_const_is_builtin.txt
new file mode 100644
index 00000000..740907cc
--- /dev/null
+++ b/tests/snippets/wat/test_i32_const_is_builtin.txt
@@ -0,0 +1,6 @@
+---input---
+i32.const
+
+---tokens---
+'i32.const' Name.Builtin
+'\n' Text
diff --git a/tests/snippets/wat/test_multiline_comment.txt b/tests/snippets/wat/test_multiline_comment.txt
new file mode 100644
index 00000000..6cbd45ea
--- /dev/null
+++ b/tests/snippets/wat/test_multiline_comment.txt
@@ -0,0 +1,11 @@
+---input---
+(;
+ comment
+;)
+
+---tokens---
+'(;' Comment.Multiline
+'\n comment\n' Comment.Multiline
+
+';)' Comment.Multiline
+'\n' Text
diff --git a/tests/snippets/wat/test_nested_comment.txt b/tests/snippets/wat/test_nested_comment.txt
new file mode 100644
index 00000000..de072939
--- /dev/null
+++ b/tests/snippets/wat/test_nested_comment.txt
@@ -0,0 +1,14 @@
+---input---
+(;
+nested(;;)comment
+;)
+
+---tokens---
+'(;' Comment.Multiline
+'\nnested' Comment.Multiline
+'(;' Comment.Multiline
+';)' Comment.Multiline
+'comment\n' Comment.Multiline
+
+';)' Comment.Multiline
+'\n' Text
diff --git a/tests/snippets/wat/test_string_byte_escape.txt b/tests/snippets/wat/test_string_byte_escape.txt
new file mode 100644
index 00000000..c0b9e4a0
--- /dev/null
+++ b/tests/snippets/wat/test_string_byte_escape.txt
@@ -0,0 +1,9 @@
+---input---
+"\001"
+
+---tokens---
+'"' Literal.String.Double
+'\\00' Literal.String.Escape
+'1' Literal.String.Double
+'"' Literal.String.Double
+'\n' Text
diff --git a/tests/snippets/wat/test_string_with_escape.txt b/tests/snippets/wat/test_string_with_escape.txt
new file mode 100644
index 00000000..c978faa4
--- /dev/null
+++ b/tests/snippets/wat/test_string_with_escape.txt
@@ -0,0 +1,9 @@
+---input---
+"string\t"
+
+---tokens---
+'"' Literal.String.Double
+'string' Literal.String.Double
+'\\t' Literal.String.Escape
+'"' Literal.String.Double
+'\n' Text
diff --git a/tests/snippets/wat/test_variable_name_pattern.txt b/tests/snippets/wat/test_variable_name_pattern.txt
new file mode 100644
index 00000000..d305ab94
--- /dev/null
+++ b/tests/snippets/wat/test_variable_name_pattern.txt
@@ -0,0 +1,6 @@
+---input---
+$ABCabc123!#$%&'*+./:<=>?@\\^_`|~-A
+
+---tokens---
+"$ABCabc123!#$%&'*+./:<=>?@\\\\^_`|~-A" Name.Variable
+'\n' Text