summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Schweizer <sebastian@schweizer.tel>2016-03-20 21:48:22 +1300
committerSebastian Schweizer <sebastian@schweizer.tel>2016-03-20 21:48:22 +1300
commitdc9c6a2eee8b62e50160c46d26e4eade377f7634 (patch)
treebb50ae14665eb418153c924674c1d1015089f1eb
parent7dc426c288c6b13147ad669fcfc95ecdbc77b6f2 (diff)
downloadpygments-git-dc9c6a2eee8b62e50160c46d26e4eade377f7634.tar.gz
Add a lexer for the Whiley programming language
-rw-r--r--AUTHORS1
-rw-r--r--pygments/lexers/_mapping.py1
-rw-r--r--pygments/lexers/whiley.py110
-rw-r--r--tests/examplefiles/example.whiley296
4 files changed, 408 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
index 204b4efe..e5f6df14 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -170,6 +170,7 @@ Other contributors, listed alphabetically, are:
* Joe Schafer -- Ada lexer
* Ken Schutte -- Matlab lexers
* Tassilo Schweyer -- Io, MOOCode lexers
+* Sebastian Schweizer -- Whiley lexer
* Ted Shaw -- AutoIt lexer
* Joerg Sieker -- ABAP lexer
* Robert Simmons -- Standard ML lexer
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index dba6d69a..5235f862 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -416,6 +416,7 @@ LEXERS = {
'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)),
'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)),
'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()),
+ 'WhileyLexer': ('pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)),
'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)),
'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')),
'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')),
diff --git a/pygments/lexers/whiley.py b/pygments/lexers/whiley.py
new file mode 100644
index 00000000..d3c52949
--- /dev/null
+++ b/pygments/lexers/whiley.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.whiley
+ ~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for the Whiley language.
+
+ :copyright: Copyright 2006-2016 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from pygments.lexer import RegexLexer, bygroups, words
+from pygments.token import Comment, Keyword, Name, Number, Operator, \
+ Punctuation, String, Text
+
+__all__ = ['WhileyLexer']
+
+
+class WhileyLexer(RegexLexer):
+ """
+ Lexer for the Whiley programming language.
+ """
+ name = 'Whiley'
+ filenames = ['*.whiley']
+ aliases = ['whiley']
+ mimetypes = ['text/x-whiley']
+
+ # See the language specification:
+ # http://whiley.org/download/WhileyLanguageSpec.pdf
+
+ tokens = {
+ 'root': [
+ # Whitespace
+ (r'\s+', Text),
+
+ # Comments
+ (r'//.*', Comment.Single),
+ # don't parse empty comment as doc comment
+ (r'/\*\*/', Comment.Multiline),
+ (r'(?ms)/\*\*.*?\*/', String.Doc),
+ (r'(?ms)/\*.*?\*/', Comment.Multiline),
+
+ # Keywords
+ (words((
+ 'if', 'else', 'while', 'for', 'do', 'return',
+ 'switch', 'case', 'default', 'break', 'continue',
+ 'requires', 'ensures', 'where', 'assert', 'assume',
+ 'all', 'no', 'some', 'in', 'is', 'new',
+ 'throw', 'try', 'catch', 'debug', 'skip', 'fail',
+ 'finite', 'total',
+ ), suffix=r'\b'), Keyword.Reserved),
+ (words((
+ 'function', 'method', 'public', 'private', 'protected',
+ 'export', 'native',
+ ), suffix=r'\b'), Keyword.Declaration),
+ # "constant" & "type" are not keywords unless used in declarations
+ (r'(constant|type)(\s+)([a-zA-Z_]\w*)(\s+)(is)\b',
+ bygroups(Keyword.Declaration, Text, Name, Text, Keyword)),
+ (r'(true|false|null)\b', Keyword.Constant),
+ (r'(bool|byte|int|real|any|void)\b', Keyword.Type),
+ # "from" is not a keyword unless used with import
+ (r'(import)(\s+)(\*)([^\S\n]+)(from)\b',
+ bygroups(Keyword.Namespace, Text, Punctuation, Text, Keyword.Namespace)),
+ (r'(import)(\s+)([a-zA-Z_]\w*)([^\S\n]+)(from)\b',
+ bygroups(Keyword.Namespace, Text, Name, Text, Keyword.Namespace)),
+ (r'package|import\b', Keyword.Namespace),
+
+ # standard library: https://github.com/Whiley/WhileyLibs/
+ (words((
+ # types defined in whiley.lang.Int
+ 'i8', 'i16', 'i32', 'i64',
+ 'u8', 'u16', 'u32', 'u64',
+ 'uint', 'nat',
+
+ # whiley.lang.Any
+ 'toString',
+ ), suffix=r'\b'), Name.Builtin),
+
+ # byte literal
+ (r'[01]+b', Number.Bin),
+
+ # decimal literal
+ (r'[0-9]+\.[0-9]+', Number.Float),
+
+ # integer literal
+ (r'0x[0-9a-fA-F]+', Number.Hex),
+ (r'[0-9]+', Number.Integer),
+
+ # character literal
+ (r"""'[^\\]'""", String.Char),
+ (r"""(')(\\['"\\btnfr])(')""",
+ bygroups(String.Char, String.Escape, String.Char)),
+
+ # string literal
+ (r'"', String, 'string'),
+
+ # operators and punctuation
+ (r'[{}()\[\],.;]', Punctuation),
+ (r'[+\-*/%&|<>^!~@=:?]', Operator),
+
+ # identifier
+ (r'[a-zA-Z_]\w*', Name),
+ ],
+ 'string': [
+ (r'"', String, '#pop'),
+ (r'\\[btnfr]', String.Escape),
+ (r'\\.', String),
+ (r'[^\\"]+', String),
+ ],
+ }
diff --git a/tests/examplefiles/example.whiley b/tests/examplefiles/example.whiley
new file mode 100644
index 00000000..74b39370
--- /dev/null
+++ b/tests/examplefiles/example.whiley
@@ -0,0 +1,296 @@
+/**
+ * Example Whiley program, taken from the Whiley benchmark suite.
+ * https://github.com/Whiley/WyBench/blob/master/src/101_interpreter/Main.whiley
+ */
+
+import whiley.lang.System
+import whiley.lang.Int
+import whiley.io.File
+import string from whiley.lang.ASCII
+import char from whiley.lang.ASCII
+
+// ====================================================
+// A simple calculator for expressions
+// ====================================================
+
+constant ADD is 0
+constant SUB is 1
+constant MUL is 2
+constant DIV is 3
+
+// binary operation
+type BOp is (int x) where ADD <= x && x <= DIV
+type BinOp is { BOp op, Expr lhs, Expr rhs }
+
+// variables
+type Var is { string id }
+
+// list access
+type ListAccess is {
+ Expr src,
+ Expr index
+}
+
+// expression tree
+type Expr is int | // constant
+ Var | // variable
+ BinOp | // binary operator
+ Expr[] | // array constructor
+ ListAccess // list access
+
+// values
+type Value is int | Value[]
+
+// stmts
+type Print is { Expr rhs }
+type Set is { string lhs, Expr rhs }
+type Stmt is Print | Set
+
+// ====================================================
+// Expression Evaluator
+// ====================================================
+
+type RuntimeError is { string msg }
+type Environment is [{string k, Value v}]
+
+// Evaluate an expression in a given environment reducing either to a
+// value, or a runtime error. The latter occurs if evaluation gets
+// "stuck" (e.g. expression is // not well-formed)
+function evaluate(Expr e, Environment env) -> Value | RuntimeError:
+ //
+ if e is int:
+ return e
+ else if e is Var:
+ return env[e.id]
+ else if e is BinOp:
+ Value|RuntimeError lhs = evaluate(e.lhs, env)
+ Value|RuntimeError rhs = evaluate(e.rhs, env)
+ // check if stuck
+ if !(lhs is int && rhs is int):
+ return {msg: "arithmetic attempted on non-numeric value"}
+ // switch statement would be good
+ if e.op == ADD:
+ return lhs + rhs
+ else if e.op == SUB:
+ return lhs - rhs
+ else if e.op == MUL:
+ return lhs * rhs
+ else if rhs != 0:
+ return lhs / rhs
+ return {msg: "divide-by-zero"}
+ else if e is Expr[]:
+ [Value] r = []
+ for i in e:
+ Value|RuntimeError v = evaluate(i, env)
+ if v is RuntimeError:
+ return v
+ else:
+ r = r ++ [v]
+ return r
+ else if e is ListAccess:
+ Value|RuntimeError src = evaluate(e.src, env)
+ Value|RuntimeError index = evaluate(e.index, env)
+ // santity checks
+ if src is [Value] && index is int && index >= 0 && index < |src|:
+ return src[index]
+ else:
+ return {msg: "invalid list access"}
+ else:
+ return 0 // dead-code
+
+// ====================================================
+// Expression Parser
+// ====================================================
+
+type State is { string input, int pos }
+type SyntaxError is { string msg, int start, int end }
+
+function SyntaxError(string msg, int start, int end) -> SyntaxError:
+ return { msg: msg, start: start, end: end }
+
+// Top-level parse method
+function parse(State st) -> (Stmt,State)|SyntaxError:
+ //
+ Var keyword, Var v
+ Expr e
+ int start = st.pos
+ //
+ keyword,st = parseIdentifier(st)
+ switch keyword.id:
+ case "print":
+ any r = parseAddSubExpr(st)
+ if !(r is SyntaxError):
+ e,st = r
+ return {rhs: e},st
+ else:
+ return r // error case
+ case "set":
+ st = parseWhiteSpace(st)
+ v,st = parseIdentifier(st)
+ any r = parseAddSubExpr(st)
+ if !(r is SyntaxError):
+ e,st = r
+ return {lhs: v.id, rhs: e},st
+ else:
+ return r // error case
+ default:
+ return SyntaxError("unknown statement",start,st.pos-1)
+
+function parseAddSubExpr(State st) -> (Expr, State)|SyntaxError:
+ //
+ Expr lhs, Expr rhs
+ // First, pass left-hand side
+ any r = parseMulDivExpr(st)
+ //
+ if r is SyntaxError:
+ return r
+ //
+ lhs,st = r
+ st = parseWhiteSpace(st)
+ // Second, see if there is a right-hand side
+ if st.pos < |st.input| && st.input[st.pos] == '+':
+ // add expression
+ st.pos = st.pos + 1
+ r = parseAddSubExpr(st)
+ if !(r is SyntaxError):
+ rhs,st = r
+ return {op: ADD, lhs: lhs, rhs: rhs},st
+ else:
+ return r
+ else if st.pos < |st.input| && st.input[st.pos] == '-':
+ // subtract expression
+ st.pos = st.pos + 1
+ r = parseAddSubExpr(st)
+ if !(r is SyntaxError):
+ rhs,st = r
+ return {op: SUB, lhs: lhs, rhs: rhs},st
+ else:
+ return r
+ // No right-hand side
+ return (lhs,st)
+
+function parseMulDivExpr(State st) -> (Expr, State)|SyntaxError:
+ // First, parse left-hand side
+ Expr lhs, Expr rhs
+ any r = parseTerm(st)
+ if r is SyntaxError:
+ return r
+ //
+ lhs,st = r
+ st = parseWhiteSpace(st)
+ // Second, see if there is a right-hand side
+ if st.pos < |st.input| && st.input[st.pos] == '*':
+ // add expression
+ st.pos = st.pos + 1
+ r = parseMulDivExpr(st)
+ if !(r is SyntaxError):
+ rhs,st = r
+ return {op: MUL, lhs: lhs, rhs: rhs}, st
+ else:
+ return r
+ else if st.pos < |st.input| && st.input[st.pos] == '/':
+ // subtract expression
+ st.pos = st.pos + 1
+ r = parseMulDivExpr(st)
+ if !(r is SyntaxError):
+ rhs,st = r
+ return {op: DIV, lhs: lhs, rhs: rhs}, st
+ else:
+ return r
+ // No right-hand side
+ return (lhs,st)
+
+function parseTerm(State st) -> (Expr, State)|SyntaxError:
+ //
+ st = parseWhiteSpace(st)
+ if st.pos < |st.input|:
+ if ASCII.isLetter(st.input[st.pos]):
+ return parseIdentifier(st)
+ else if ASCII.isDigit(st.input[st.pos]):
+ return parseNumber(st)
+ else if st.input[st.pos] == '[':
+ return parseList(st)
+ //
+ return SyntaxError("expecting number or variable",st.pos,st.pos)
+
+function parseIdentifier(State st) -> (Var, State):
+ //
+ string txt = ""
+ // inch forward until end of identifier reached
+ while st.pos < |st.input| && ASCII.isLetter(st.input[st.pos]):
+ txt = txt ++ [st.input[st.pos]]
+ st.pos = st.pos + 1
+ return ({id:txt}, st)
+
+function parseNumber(State st) -> (Expr, State)|SyntaxError:
+ // inch forward until end of identifier reached
+ int start = st.pos
+ while st.pos < |st.input| && ASCII.isDigit(st.input[st.pos]):
+ st.pos = st.pos + 1
+ //
+ int|null iv = Int.parse(st.input[start..st.pos])
+ if iv == null:
+ return SyntaxError("Error parsing number",start,st.pos)
+ else:
+ return iv, st
+
+function parseList(State st) -> (Expr, State)|SyntaxError:
+ //
+ st.pos = st.pos + 1 // skip '['
+ st = parseWhiteSpace(st)
+ [Expr] l = [] // initial list
+ bool firstTime = true
+ while st.pos < |st.input| && st.input[st.pos] != ']':
+ if !firstTime && st.input[st.pos] != ',':
+ return SyntaxError("expecting comma",st.pos,st.pos)
+ else if !firstTime:
+ st.pos = st.pos + 1 // skip ','
+ firstTime = false
+ any r = parseAddSubExpr(st)
+ if r is SyntaxError:
+ return r
+ else:
+ Expr e
+ e,st = r
+ // perform annoying error check
+ l = l ++ [e]
+ st = parseWhiteSpace(st)
+ st.pos = st.pos + 1
+ return l,st
+
+// Parse all whitespace upto end-of-file
+function parseWhiteSpace(State st) -> State:
+ while st.pos < |st.input| && ASCII.isWhiteSpace(st.input[st.pos]):
+ st.pos = st.pos + 1
+ return st
+
+// ====================================================
+// Main Method
+// ====================================================
+
+public method main(System.Console sys):
+ if(|sys.args| == 0):
+ sys.out.println("no parameter provided!")
+ else:
+ File.Reader file = File.Reader(sys.args[0])
+ string input = ASCII.fromBytes(file.readAll())
+
+ Environment env = Environment()
+ State st = {pos: 0, input: input}
+ while st.pos < |st.input|:
+ Stmt s
+ any r = parse(st)
+ if r is SyntaxError:
+ sys.out.println("syntax error: " ++ r.msg)
+ return
+ s,st = r
+ Value|RuntimeError v = evaluate(s.rhs,env)
+ if v is RuntimeError:
+ sys.out.println("runtime error: " ++ v.msg)
+ return
+ if s is Set:
+ env[s.lhs] = v
+ else:
+ sys.out.println(r)
+ st = parseWhiteSpace(st)
+