diff options
| author | Sebastian Schweizer <sebastian@schweizer.tel> | 2016-03-20 21:48:22 +1300 |
|---|---|---|
| committer | Sebastian Schweizer <sebastian@schweizer.tel> | 2016-03-20 21:48:22 +1300 |
| commit | dc9c6a2eee8b62e50160c46d26e4eade377f7634 (patch) | |
| tree | bb50ae14665eb418153c924674c1d1015089f1eb | |
| parent | 7dc426c288c6b13147ad669fcfc95ecdbc77b6f2 (diff) | |
| download | pygments-git-dc9c6a2eee8b62e50160c46d26e4eade377f7634.tar.gz | |
Add a lexer for the Whiley programming language
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | pygments/lexers/_mapping.py | 1 | ||||
| -rw-r--r-- | pygments/lexers/whiley.py | 110 | ||||
| -rw-r--r-- | tests/examplefiles/example.whiley | 296 |
4 files changed, 408 insertions, 0 deletions
@@ -170,6 +170,7 @@ Other contributors, listed alphabetically, are: * Joe Schafer -- Ada lexer * Ken Schutte -- Matlab lexers * Tassilo Schweyer -- Io, MOOCode lexers +* Sebastian Schweizer -- Whiley lexer * Ted Shaw -- AutoIt lexer * Joerg Sieker -- ABAP lexer * Robert Simmons -- Standard ML lexer diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index dba6d69a..5235f862 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -416,6 +416,7 @@ LEXERS = { 'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)), 'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)), 'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()), + 'WhileyLexer': ('pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)), 'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)), 'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')), 'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')), diff --git a/pygments/lexers/whiley.py b/pygments/lexers/whiley.py new file mode 100644 index 00000000..d3c52949 --- /dev/null +++ b/pygments/lexers/whiley.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.whiley + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for the Whiley language. + + :copyright: Copyright 2006-2016 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, bygroups, words +from pygments.token import Comment, Keyword, Name, Number, Operator, \ + Punctuation, String, Text + +__all__ = ['WhileyLexer'] + + +class WhileyLexer(RegexLexer): + """ + Lexer for the Whiley programming language. + """ + name = 'Whiley' + filenames = ['*.whiley'] + aliases = ['whiley'] + mimetypes = ['text/x-whiley'] + + # See the language specification: + # http://whiley.org/download/WhileyLanguageSpec.pdf + + tokens = { + 'root': [ + # Whitespace + (r'\s+', Text), + + # Comments + (r'//.*', Comment.Single), + # don't parse empty comment as doc comment + (r'/\*\*/', Comment.Multiline), + (r'(?ms)/\*\*.*?\*/', String.Doc), + (r'(?ms)/\*.*?\*/', Comment.Multiline), + + # Keywords + (words(( + 'if', 'else', 'while', 'for', 'do', 'return', + 'switch', 'case', 'default', 'break', 'continue', + 'requires', 'ensures', 'where', 'assert', 'assume', + 'all', 'no', 'some', 'in', 'is', 'new', + 'throw', 'try', 'catch', 'debug', 'skip', 'fail', + 'finite', 'total', + ), suffix=r'\b'), Keyword.Reserved), + (words(( + 'function', 'method', 'public', 'private', 'protected', + 'export', 'native', + ), suffix=r'\b'), Keyword.Declaration), + # "constant" & "type" are not keywords unless used in declarations + (r'(constant|type)(\s+)([a-zA-Z_]\w*)(\s+)(is)\b', + bygroups(Keyword.Declaration, Text, Name, Text, Keyword)), + (r'(true|false|null)\b', Keyword.Constant), + (r'(bool|byte|int|real|any|void)\b', Keyword.Type), + # "from" is not a keyword unless used with import + (r'(import)(\s+)(\*)([^\S\n]+)(from)\b', + bygroups(Keyword.Namespace, Text, Punctuation, Text, Keyword.Namespace)), + (r'(import)(\s+)([a-zA-Z_]\w*)([^\S\n]+)(from)\b', + bygroups(Keyword.Namespace, Text, Name, Text, Keyword.Namespace)), + (r'package|import\b', Keyword.Namespace), + + # standard library: https://github.com/Whiley/WhileyLibs/ + (words(( + # types defined in whiley.lang.Int + 'i8', 'i16', 'i32', 'i64', + 'u8', 'u16', 'u32', 'u64', + 'uint', 'nat', + + # whiley.lang.Any + 'toString', + ), suffix=r'\b'), Name.Builtin), + + # byte literal + (r'[01]+b', Number.Bin), + + # decimal literal + (r'[0-9]+\.[0-9]+', Number.Float), + + # integer literal + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + + # character literal + (r"""'[^\\]'""", String.Char), + (r"""(')(\\['"\\btnfr])(')""", + bygroups(String.Char, String.Escape, String.Char)), + + # string literal + (r'"', String, 'string'), + + # operators and punctuation + (r'[{}()\[\],.;]', Punctuation), + (r'[+\-*/%&|<>^!~@=:?]', Operator), + + # identifier + (r'[a-zA-Z_]\w*', Name), + ], + 'string': [ + (r'"', String, '#pop'), + (r'\\[btnfr]', String.Escape), + (r'\\.', String), + (r'[^\\"]+', String), + ], + } diff --git a/tests/examplefiles/example.whiley b/tests/examplefiles/example.whiley new file mode 100644 index 00000000..74b39370 --- /dev/null +++ b/tests/examplefiles/example.whiley @@ -0,0 +1,296 @@ +/** + * Example Whiley program, taken from the Whiley benchmark suite. + * https://github.com/Whiley/WyBench/blob/master/src/101_interpreter/Main.whiley + */ + +import whiley.lang.System +import whiley.lang.Int +import whiley.io.File +import string from whiley.lang.ASCII +import char from whiley.lang.ASCII + +// ==================================================== +// A simple calculator for expressions +// ==================================================== + +constant ADD is 0 +constant SUB is 1 +constant MUL is 2 +constant DIV is 3 + +// binary operation +type BOp is (int x) where ADD <= x && x <= DIV +type BinOp is { BOp op, Expr lhs, Expr rhs } + +// variables +type Var is { string id } + +// list access +type ListAccess is { + Expr src, + Expr index +} + +// expression tree +type Expr is int | // constant + Var | // variable + BinOp | // binary operator + Expr[] | // array constructor + ListAccess // list access + +// values +type Value is int | Value[] + +// stmts +type Print is { Expr rhs } +type Set is { string lhs, Expr rhs } +type Stmt is Print | Set + +// ==================================================== +// Expression Evaluator +// ==================================================== + +type RuntimeError is { string msg } +type Environment is [{string k, Value v}] + +// Evaluate an expression in a given environment reducing either to a +// value, or a runtime error. The latter occurs if evaluation gets +// "stuck" (e.g. expression is // not well-formed) +function evaluate(Expr e, Environment env) -> Value | RuntimeError: + // + if e is int: + return e + else if e is Var: + return env[e.id] + else if e is BinOp: + Value|RuntimeError lhs = evaluate(e.lhs, env) + Value|RuntimeError rhs = evaluate(e.rhs, env) + // check if stuck + if !(lhs is int && rhs is int): + return {msg: "arithmetic attempted on non-numeric value"} + // switch statement would be good + if e.op == ADD: + return lhs + rhs + else if e.op == SUB: + return lhs - rhs + else if e.op == MUL: + return lhs * rhs + else if rhs != 0: + return lhs / rhs + return {msg: "divide-by-zero"} + else if e is Expr[]: + [Value] r = [] + for i in e: + Value|RuntimeError v = evaluate(i, env) + if v is RuntimeError: + return v + else: + r = r ++ [v] + return r + else if e is ListAccess: + Value|RuntimeError src = evaluate(e.src, env) + Value|RuntimeError index = evaluate(e.index, env) + // santity checks + if src is [Value] && index is int && index >= 0 && index < |src|: + return src[index] + else: + return {msg: "invalid list access"} + else: + return 0 // dead-code + +// ==================================================== +// Expression Parser +// ==================================================== + +type State is { string input, int pos } +type SyntaxError is { string msg, int start, int end } + +function SyntaxError(string msg, int start, int end) -> SyntaxError: + return { msg: msg, start: start, end: end } + +// Top-level parse method +function parse(State st) -> (Stmt,State)|SyntaxError: + // + Var keyword, Var v + Expr e + int start = st.pos + // + keyword,st = parseIdentifier(st) + switch keyword.id: + case "print": + any r = parseAddSubExpr(st) + if !(r is SyntaxError): + e,st = r + return {rhs: e},st + else: + return r // error case + case "set": + st = parseWhiteSpace(st) + v,st = parseIdentifier(st) + any r = parseAddSubExpr(st) + if !(r is SyntaxError): + e,st = r + return {lhs: v.id, rhs: e},st + else: + return r // error case + default: + return SyntaxError("unknown statement",start,st.pos-1) + +function parseAddSubExpr(State st) -> (Expr, State)|SyntaxError: + // + Expr lhs, Expr rhs + // First, pass left-hand side + any r = parseMulDivExpr(st) + // + if r is SyntaxError: + return r + // + lhs,st = r + st = parseWhiteSpace(st) + // Second, see if there is a right-hand side + if st.pos < |st.input| && st.input[st.pos] == '+': + // add expression + st.pos = st.pos + 1 + r = parseAddSubExpr(st) + if !(r is SyntaxError): + rhs,st = r + return {op: ADD, lhs: lhs, rhs: rhs},st + else: + return r + else if st.pos < |st.input| && st.input[st.pos] == '-': + // subtract expression + st.pos = st.pos + 1 + r = parseAddSubExpr(st) + if !(r is SyntaxError): + rhs,st = r + return {op: SUB, lhs: lhs, rhs: rhs},st + else: + return r + // No right-hand side + return (lhs,st) + +function parseMulDivExpr(State st) -> (Expr, State)|SyntaxError: + // First, parse left-hand side + Expr lhs, Expr rhs + any r = parseTerm(st) + if r is SyntaxError: + return r + // + lhs,st = r + st = parseWhiteSpace(st) + // Second, see if there is a right-hand side + if st.pos < |st.input| && st.input[st.pos] == '*': + // add expression + st.pos = st.pos + 1 + r = parseMulDivExpr(st) + if !(r is SyntaxError): + rhs,st = r + return {op: MUL, lhs: lhs, rhs: rhs}, st + else: + return r + else if st.pos < |st.input| && st.input[st.pos] == '/': + // subtract expression + st.pos = st.pos + 1 + r = parseMulDivExpr(st) + if !(r is SyntaxError): + rhs,st = r + return {op: DIV, lhs: lhs, rhs: rhs}, st + else: + return r + // No right-hand side + return (lhs,st) + +function parseTerm(State st) -> (Expr, State)|SyntaxError: + // + st = parseWhiteSpace(st) + if st.pos < |st.input|: + if ASCII.isLetter(st.input[st.pos]): + return parseIdentifier(st) + else if ASCII.isDigit(st.input[st.pos]): + return parseNumber(st) + else if st.input[st.pos] == '[': + return parseList(st) + // + return SyntaxError("expecting number or variable",st.pos,st.pos) + +function parseIdentifier(State st) -> (Var, State): + // + string txt = "" + // inch forward until end of identifier reached + while st.pos < |st.input| && ASCII.isLetter(st.input[st.pos]): + txt = txt ++ [st.input[st.pos]] + st.pos = st.pos + 1 + return ({id:txt}, st) + +function parseNumber(State st) -> (Expr, State)|SyntaxError: + // inch forward until end of identifier reached + int start = st.pos + while st.pos < |st.input| && ASCII.isDigit(st.input[st.pos]): + st.pos = st.pos + 1 + // + int|null iv = Int.parse(st.input[start..st.pos]) + if iv == null: + return SyntaxError("Error parsing number",start,st.pos) + else: + return iv, st + +function parseList(State st) -> (Expr, State)|SyntaxError: + // + st.pos = st.pos + 1 // skip '[' + st = parseWhiteSpace(st) + [Expr] l = [] // initial list + bool firstTime = true + while st.pos < |st.input| && st.input[st.pos] != ']': + if !firstTime && st.input[st.pos] != ',': + return SyntaxError("expecting comma",st.pos,st.pos) + else if !firstTime: + st.pos = st.pos + 1 // skip ',' + firstTime = false + any r = parseAddSubExpr(st) + if r is SyntaxError: + return r + else: + Expr e + e,st = r + // perform annoying error check + l = l ++ [e] + st = parseWhiteSpace(st) + st.pos = st.pos + 1 + return l,st + +// Parse all whitespace upto end-of-file +function parseWhiteSpace(State st) -> State: + while st.pos < |st.input| && ASCII.isWhiteSpace(st.input[st.pos]): + st.pos = st.pos + 1 + return st + +// ==================================================== +// Main Method +// ==================================================== + +public method main(System.Console sys): + if(|sys.args| == 0): + sys.out.println("no parameter provided!") + else: + File.Reader file = File.Reader(sys.args[0]) + string input = ASCII.fromBytes(file.readAll()) + + Environment env = Environment() + State st = {pos: 0, input: input} + while st.pos < |st.input|: + Stmt s + any r = parse(st) + if r is SyntaxError: + sys.out.println("syntax error: " ++ r.msg) + return + s,st = r + Value|RuntimeError v = evaluate(s.rhs,env) + if v is RuntimeError: + sys.out.println("runtime error: " ++ v.msg) + return + if s is Set: + env[s.lhs] = v + else: + sys.out.println(r) + st = parseWhiteSpace(st) + |
