diff options
| author | Tim Hatch <tim@timhatch.com> | 2015-10-13 11:34:10 -0700 |
|---|---|---|
| committer | Tim Hatch <tim@timhatch.com> | 2015-10-13 11:34:10 -0700 |
| commit | fe5643e7d13809ca6eae7ec7e95c03bea6012226 (patch) | |
| tree | eb9cdcfe0a2dadff5a76f4bcc96acb2a72a1f160 | |
| parent | 483deba6920adf5c56a318acffb88c890656bc70 (diff) | |
| parent | 27e304015dc3882ddb59122e168765775fb0e014 (diff) | |
| download | pygments-fe5643e7d13809ca6eae7ec7e95c03bea6012226.tar.gz | |
Merged in hanachin/pygments-main/add-ruby-filename (pull request #450)
Add Gemfile to RubyLexer filenames
85 files changed, 5905 insertions, 678 deletions
@@ -1,15 +1,16 @@ syntax: glob +*.egg *.pyc *.pyo -*.egg -build/* -dist/* -doc/_build -Pygments.egg-info/* -.ropeproject -tests/examplefiles/output .idea/ +.ropeproject .tags +.tox +Pygments.egg-info/* TAGS +build/* +dist/* +doc/_build tests/.coverage tests/cover +tests/examplefiles/output @@ -7,7 +7,7 @@ Other contributors, listed alphabetically, are: * Sam Aaron -- Ioke lexer * Ali Afshar -- image formatter -* Thomas Aglassinger -- Rexx lexer +* Thomas Aglassinger -- Easytrieve, JCL and Rexx lexers * Kumar Appaiah -- Debian control lexer * Andreas Amann -- AppleScript lexer * Timothy Armstrong -- Dart lexer fixes @@ -21,6 +21,7 @@ Other contributors, listed alphabetically, are: * Tim Baumann -- (Literate) Agda lexer * Paul Baumgart, 280 North, Inc. -- Objective-J lexer * Michael Bayer -- Myghty lexers +* Thomas Beale -- Archetype lexers * John Benediktsson -- Factor lexer * Christopher Bertels -- Fancy lexer * Jarrett Billingsley -- MiniD lexer @@ -37,6 +38,7 @@ Other contributors, listed alphabetically, are: * Bryan Davis -- EBNF lexer * Owen Durni -- Haxe lexer * Alexander Dutton, Oxford University Computing Services -- SPARQL lexer +* James Edwards -- Terraform lexer * Nick Efford -- Python 3 lexer * Sven Efftinge -- Xtend lexer * Artem Egorkine -- terminal256 formatter @@ -57,11 +59,14 @@ Other contributors, listed alphabetically, are: * Patrick Gotthardt -- PHP namespaces support * Olivier Guibe -- Asymptote lexer * Jordi Gutiérrez Hermoso -- Octave lexer +* Florian Hahn -- Boogie lexer * Martin Harriman -- SNOBOL lexer * Matthew Harrison -- SVG formatter * Steven Hazel -- Tcl lexer +* Dan Michael Heggø -- Turtle lexer * Aslak Hellesøy -- Gherkin lexer * Greg Hendershott -- Racket lexer +* Justin Hendrick -- ParaSail lexer * David Hess, Fish Software, Inc. -- Objective-J lexer * Varun Hiremath -- Debian control lexer * Rob Hoelz -- Perl 6 lexer @@ -127,6 +132,7 @@ Other contributors, listed alphabetically, are: * Dominik Picheta -- Nimrod lexer * Andrew Pinkham -- RTF Formatter Refactoring * Clément Prévost -- UrbiScript lexer +* Elias Rabel -- Fortran fixed form lexer * raichoo -- Idris lexer * Kashif Rasul -- CUDA lexer * Justin Reidy -- MXML lexer @@ -136,6 +142,7 @@ Other contributors, listed alphabetically, are: * Andre Roberge -- Tango style * Konrad Rudolph -- LaTeX formatter enhancements * Mario Ruggier -- Evoque lexers +* Miikka Salminen -- Lovelace style, lexer enhancements * Stou Sandalski -- NumPy, FORTRAN, tcsh and XSLT lexers * Matteo Sasso -- Common Lisp lexer * Joe Schafer -- Ada lexer @@ -15,7 +15,28 @@ Version 2.1 * Emacs Lisp (PR#431) * Arduino (PR#442) + * Modula-2 with multi-dialect support (#1090) + * Fortran fixed format (PR#213) + * Archetype Definition language (PR#483) + * Terraform (PR#432) + * Jcl, Easytrieve (PR#208) + * ParaSail (PR#381) + * Boogie (PR#420) + * Turtle (PR#425) +- Added styles: + + * Lovelace (PR#456) + * Algol and Algol-nu (#1090) + +- Updated autopygmentize script (PR#445) + +- Fixed style inheritance for non-standard token types in HTML output. + +- Added support for async/await to Python 3 lexer. + +- Rewrote linenos option for TerminalFormatter (it's better, but slightly + different output than before). Version 2.0.3 ------------- @@ -54,3 +54,9 @@ test: test-coverage: @$(PYTHON) tests/run.py -d --with-coverage --cover-package=pygments --cover-erase $(TEST) + +tox-test: + @tox -- $(TEST) + +tox-test-coverage: + @tox -- --with-coverage --cover-package=pygments --cover-erase $(TEST) diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst index 08069889..2c868440 100644 --- a/doc/docs/lexerdevelopment.rst +++ b/doc/docs/lexerdevelopment.rst @@ -145,7 +145,7 @@ Regex Flags You can either define regex flags locally in the regex (``r'(?x)foo bar'``) or globally by adding a `flags` attribute to your lexer class. If no attribute is -defined, it defaults to `re.MULTILINE`. For more informations about regular +defined, it defaults to `re.MULTILINE`. For more information about regular expression flags see the page about `regular expressions`_ in the Python documentation. @@ -345,15 +345,14 @@ There are a few more things you can do with states: `PythonLexer`'s string literal processing. - If you want your lexer to start lexing in a different state you can modify the - stack by overloading the `get_tokens_unprocessed()` method:: + stack by overriding the `get_tokens_unprocessed()` method:: from pygments.lexer import RegexLexer class ExampleLexer(RegexLexer): tokens = {...} - def get_tokens_unprocessed(self, text): - stack = ['root', 'otherstate'] + def get_tokens_unprocessed(self, text, stack=('root', 'otherstate')): for item in RegexLexer.get_tokens_unprocessed(text, stack): yield item diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst index 194eb70f..6455a501 100644 --- a/doc/docs/tokens.rst +++ b/doc/docs/tokens.rst @@ -297,6 +297,10 @@ Comments `Comment` Token type for any comment. +`Comment.Hashbang` + Token type for hashbang comments (i.e. first lines of files that start with + ``#!``). + `Comment.Multiline` Token type for multiline comments. diff --git a/doc/faq.rst b/doc/faq.rst index f040e053..aeba9259 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -102,7 +102,6 @@ This is an (incomplete) list of projects and sites known to use the Pygments hig * `BzrFruit <http://repo.or.cz/w/bzrfruit.git>`_, a Bazaar branch viewer * `QBzr <http://bazaar-vcs.org/QBzr>`_, a cross-platform Qt-based GUI front end for Bazaar * `BitBucket <http://bitbucket.org/>`_, a Mercurial and Git hosting site -* `GitHub <http://github.com/>`_, a site offering secure Git hosting and collaborative development * `Review Board <http://www.review-board.org/>`_, a collaborative code reviewing tool * `skeletonz <http://orangoo.com/skeletonz/>`_, a Python powered content management system * `Diamanda <http://code.google.com/p/diamanda/>`_, a Django powered wiki system with support for Pygments diff --git a/doc/languages.rst b/doc/languages.rst index 1d5c3155..13555ccf 100644 --- a/doc/languages.rst +++ b/doc/languages.rst @@ -86,7 +86,6 @@ Programming languages * Visual FoxPro * XQuery * Zephir - </ul> Template languages ------------------ diff --git a/external/autopygmentize b/external/autopygmentize index 964c138f..d2f969a1 100755 --- a/external/autopygmentize +++ b/external/autopygmentize @@ -1,6 +1,6 @@ #!/bin/bash # Best effort auto-pygmentization with transparent decompression -# (c) Reuben Thomas 2012-2013 +# by Reuben Thomas 2008-2015 # This program is in the public domain. # Strategy: first see if pygmentize can find a lexer; if not, ask file; if that finds nothing, fail @@ -18,6 +18,7 @@ if [[ "$lexer" == text ]]; then unset lexer case $(file --mime-type --uncompress $file_common_opts "$file") in application/xml|image/svg+xml) lexer=xml;; + application/javascript) lexer=javascript;; text/html) lexer=html;; text/troff) lexer=nroff;; text/x-asm) lexer=nasm;; @@ -43,11 +44,13 @@ if [[ "$lexer" == text ]]; then text/x-tcl) lexer=tcl;; text/x-tex|text/x-texinfo) lexer=latex;; # FIXME: texinfo really needs its own lexer - # Types that file outputs which pygmentize didn't support as of file 5.11, pygments 1.6rc1 + # Types that file outputs which pygmentize didn't support as of file 5.20, pygments 2.0 # text/calendar + # text/inf # text/PGP # text/rtf # text/texmacs + # text/vnd.graphviz # text/x-bcpl # text/x-info # text/x-m4 @@ -74,8 +77,7 @@ if [[ -n "$lexer" ]]; then application/x-bzip2) concat=bzcat;; application/x-xz) concat=xzcat;; esac - # FIXME: Specify input encoding rather than output encoding https://bitbucket.org/birkenfeld/pygments-main/issue/800 - exec $concat "$file" | pygmentize -f terminal256 -O style=native,encoding=$encoding,outencoding=UTF-8 $PYGMENTIZE_OPTS $options -l $lexer + exec $concat "$file" | pygmentize -O inencoding=$encoding $PYGMENTIZE_OPTS $options -l $lexer fi exit 1 diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index 55548d30..b22be54f 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -140,7 +140,7 @@ class HtmlFormatter(Formatter): When `tagsfile` is set to the path of a ctags index file, it is used to generate hyperlinks from names to their definition. You must enable - `anchorlines` and run ctags with the `-n` option for this to work. The + `lineanchors` and run ctags with the `-n` option for this to work. The `python-ctags` module from PyPI must be installed to use this feature; otherwise a `RuntimeError` will be raised. @@ -428,6 +428,15 @@ class HtmlFormatter(Formatter): return self.classprefix + ttypeclass return '' + def _get_css_classes(self, ttype): + """Return the css classes of this token type prefixed with + the classprefix option.""" + cls = self._get_css_class(ttype) + while ttype not in STANDARD_TYPES: + ttype = ttype.parent + cls = self._get_css_class(ttype) + ' ' + cls + return cls + def _create_stylesheet(self): t2c = self.ttype2class = {Token: ''} c2s = self.class2style = {} @@ -711,7 +720,7 @@ class HtmlFormatter(Formatter): cclass = getcls(ttype) cspan = cclass and '<span style="%s">' % c2s[cclass][0] or '' else: - cls = self._get_css_class(ttype) + cls = self._get_css_classes(ttype) cspan = cls and '<span class="%s">' % cls or '' parts = value.translate(escape_table).split('\n') diff --git a/pygments/formatters/terminal.py b/pygments/formatters/terminal.py index 3c4b025f..a6eb48a4 100644 --- a/pygments/formatters/terminal.py +++ b/pygments/formatters/terminal.py @@ -101,51 +101,35 @@ class TerminalFormatter(Formatter): def _write_lineno(self, outfile): self._lineno += 1 - outfile.write("\n%04d: " % self._lineno) - - def _format_unencoded_with_lineno(self, tokensource, outfile): - self._write_lineno(outfile) - - for ttype, value in tokensource: - if value.endswith("\n"): - self._write_lineno(outfile) - value = value[:-1] - color = self.colorscheme.get(ttype) - while color is None: - ttype = ttype[:-1] - color = self.colorscheme.get(ttype) - if color: - color = color[self.darkbg] - spl = value.split('\n') - for line in spl[:-1]: - self._write_lineno(outfile) - if line: - outfile.write(ansiformat(color, line[:-1])) - if spl[-1]: - outfile.write(ansiformat(color, spl[-1])) - else: - outfile.write(value) - - outfile.write("\n") + outfile.write("%s%04d: " % (self._lineno != 1 and '\n' or '', self._lineno)) + + def _get_color(self, ttype): + # self.colorscheme is a dict containing usually generic types, so we + # have to walk the tree of dots. The base Token type must be a key, + # even if it's empty string, as in the default above. + colors = self.colorscheme.get(ttype) + while colors is None: + ttype = ttype.parent + colors = self.colorscheme.get(ttype) + return colors[self.darkbg] def format_unencoded(self, tokensource, outfile): if self.linenos: - self._format_unencoded_with_lineno(tokensource, outfile) - return + self._write_lineno(outfile) for ttype, value in tokensource: - color = self.colorscheme.get(ttype) - while color is None: - ttype = ttype[:-1] - color = self.colorscheme.get(ttype) - if color: - color = color[self.darkbg] - spl = value.split('\n') - for line in spl[:-1]: - if line: - outfile.write(ansiformat(color, line)) - outfile.write('\n') - if spl[-1]: - outfile.write(ansiformat(color, spl[-1])) - else: - outfile.write(value) + color = self._get_color(ttype) + + for line in value.splitlines(True): + if color: + outfile.write(ansiformat(color, line.rstrip('\n'))) + else: + outfile.write(line.rstrip('\n')) + if line.endswith('\n'): + if self.linenos: + self._write_lineno(outfile) + else: + outfile.write('\n') + + if self.linenos: + outfile.write("\n") diff --git a/pygments/lexer.py b/pygments/lexer.py index 07e81033..581508b0 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -14,7 +14,6 @@ from __future__ import print_function import re import sys import time -import itertools from pygments.filter import apply_filters, Filter from pygments.filters import get_filter_by_name diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py index 333ff630..7d0b89d4 100644 --- a/pygments/lexers/__init__.py +++ b/pygments/lexers/__init__.py @@ -88,7 +88,7 @@ def get_lexer_by_name(_alias, **options): return _lexer_cache[name](**options) # continue with lexers from setuptools entrypoints for cls in find_plugin_lexers(): - if _alias in cls.aliases: + if _alias.lower() in cls.aliases: return cls(**options) raise ClassNotFound('no lexer for alias %r found' % _alias) diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 99461c55..2b4124a6 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -21,6 +21,7 @@ LEXERS = { 'ActionScript3Lexer': ('pygments.lexers.actionscript', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')), 'ActionScriptLexer': ('pygments.lexers.actionscript', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), 'AdaLexer': ('pygments.lexers.pascal', 'Ada', ('ada', 'ada95', 'ada2005'), ('*.adb', '*.ads', '*.ada'), ('text/x-ada',)), + 'AdlLexer': ('pygments.lexers.archetype', 'ADL', ('adl',), ('*.adl', '*.adls', '*.adlf', '*.adlx'), ()), 'AgdaLexer': ('pygments.lexers.haskell', 'Agda', ('agda',), ('*.agda',), ('text/x-agda',)), 'AlloyLexer': ('pygments.lexers.dsls', 'Alloy', ('alloy',), ('*.als',), ('text/x-alloy',)), 'AmbientTalkLexer': ('pygments.lexers.ambient', 'AmbientTalk', ('at', 'ambienttalk', 'ambienttalk/2'), ('*.at',), ('text/x-ambienttalk',)), @@ -50,6 +51,7 @@ LEXERS = { 'BlitzBasicLexer': ('pygments.lexers.basic', 'BlitzBasic', ('blitzbasic', 'b3d', 'bplus'), ('*.bb', '*.decls'), ('text/x-bb',)), 'BlitzMaxLexer': ('pygments.lexers.basic', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), + 'BoogieLexer': ('pygments.lexers.esoteric', 'Boogie', ('boogie',), ('*.bpl',), ()), 'BrainfuckLexer': ('pygments.lexers.esoteric', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), 'BroLexer': ('pygments.lexers.dsls', 'Bro', ('bro',), ('*.bro',), ()), 'BugsLexer': ('pygments.lexers.modeling', 'BUGS', ('bugs', 'winbugs', 'openbugs'), ('*.bug',), ()), @@ -59,6 +61,7 @@ LEXERS = { 'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), 'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)), 'Ca65Lexer': ('pygments.lexers.asm', 'ca65 assembler', ('ca65',), ('*.s',), ()), + 'CadlLexer': ('pygments.lexers.archetype', 'cADL', ('cadl',), ('*.cadl',), ()), 'CbmBasicV2Lexer': ('pygments.lexers.basic', 'CBM BASIC V2', ('cbmbas',), ('*.bas',), ()), 'CeylonLexer': ('pygments.lexers.jvm', 'Ceylon', ('ceylon',), ('*.ceylon',), ('text/x-ceylon',)), 'Cfengine3Lexer': ('pygments.lexers.configs', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()), @@ -110,6 +113,7 @@ LEXERS = { 'DylanLidLexer': ('pygments.lexers.dylan', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)), 'ECLLexer': ('pygments.lexers.ecl', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)), 'ECLexer': ('pygments.lexers.c_like', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')), + 'EasytrieveLexer': ('pygments.lexers.scripting', 'Easytrieve', ('easytrieve',), ('*.ezt', '*.mac'), ('text/x-easytrieve',)), 'EbnfLexer': ('pygments.lexers.parsers', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)), 'EiffelLexer': ('pygments.lexers.eiffel', 'Eiffel', ('eiffel',), ('*.e',), ('text/x-eiffel',)), 'ElixirConsoleLexer': ('pygments.lexers.erlang', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)), @@ -126,7 +130,8 @@ LEXERS = { 'FancyLexer': ('pygments.lexers.ruby', 'Fancy', ('fancy', 'fy'), ('*.fy', '*.fancypack'), ('text/x-fancysrc',)), 'FantomLexer': ('pygments.lexers.fantom', 'Fantom', ('fan',), ('*.fan',), ('application/x-fantom',)), 'FelixLexer': ('pygments.lexers.felix', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)), - 'FortranLexer': ('pygments.lexers.fortran', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)), + 'FortranFixedLexer': ('pygments.lexers.fortran', 'FortranFixed', ('fortranfixed',), ('*.f', '*.F'), ()), + 'FortranLexer': ('pygments.lexers.fortran', 'Fortran', ('fortran',), ('*.f03', '*.f90', '*.F03', '*.F90'), ('text/x-fortran',)), 'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()), 'GAPLexer': ('pygments.lexers.algebra', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()), 'GLShaderLexer': ('pygments.lexers.graphics', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)), @@ -142,7 +147,7 @@ LEXERS = { 'GosuLexer': ('pygments.lexers.jvm', 'Gosu', ('gosu',), ('*.gs', '*.gsx', '*.gsp', '*.vark'), ('text/x-gosu',)), 'GosuTemplateLexer': ('pygments.lexers.jvm', 'Gosu Template', ('gst',), ('*.gst',), ('text/x-gosu-template',)), 'GroffLexer': ('pygments.lexers.markup', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')), - 'GroovyLexer': ('pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy',), ('text/x-groovy',)), + 'GroovyLexer': ('pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy', '*.gradle'), ('text/x-groovy',)), 'HamlLexer': ('pygments.lexers.html', 'Haml', ('haml',), ('*.haml',), ('text/x-haml',)), 'HandlebarsHtmlLexer': ('pygments.lexers.templates', 'HTML+Handlebars', ('html+handlebars',), ('*.handlebars', '*.hbs'), ('text/html+handlebars', 'text/x-handlebars-template')), 'HandlebarsLexer': ('pygments.lexers.templates', 'Handlebars', ('handlebars',), (), ()), @@ -175,9 +180,10 @@ LEXERS = { 'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('js+django', 'javascript+django', 'js+jinja', 'javascript+jinja'), (), ('application/x-javascript+django', 'application/x-javascript+jinja', 'text/x-javascript+django', 'text/x-javascript+jinja', 'text/javascript+django', 'text/javascript+jinja')), 'JavascriptErbLexer': ('pygments.lexers.templates', 'JavaScript+Ruby', ('js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby'), (), ('application/x-javascript+ruby', 'text/x-javascript+ruby', 'text/javascript+ruby')), 'JavascriptGenshiLexer': ('pygments.lexers.templates', 'JavaScript+Genshi Text', ('js+genshitext', 'js+genshi', 'javascript+genshitext', 'javascript+genshi'), (), ('application/x-javascript+genshi', 'text/x-javascript+genshi', 'text/javascript+genshi')), - 'JavascriptLexer': ('pygments.lexers.javascript', 'JavaScript', ('js', 'javascript'), ('*.js',), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript')), + 'JavascriptLexer': ('pygments.lexers.javascript', 'JavaScript', ('js', 'javascript'), ('*.js', '*.jsm'), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript')), 'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')), 'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')), + 'JclLexer': ('pygments.lexers.scripting', 'JCL', ('jcl',), ('*.jcl',), ('text/x-jcl',)), 'JsonLdLexer': ('pygments.lexers.data', 'JSON-LD', ('jsonld', 'json-ld'), ('*.jsonld',), ('application/ld+json',)), 'JsonLexer': ('pygments.lexers.data', 'JSON', ('json',), ('*.json',), ('application/json',)), 'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)), @@ -194,6 +200,7 @@ LEXERS = { 'LassoLexer': ('pygments.lexers.javascript', 'Lasso', ('lasso', 'lassoscript'), ('*.lasso', '*.lasso[89]'), ('text/x-lasso',)), 'LassoXmlLexer': ('pygments.lexers.templates', 'XML+Lasso', ('xml+lasso',), (), ('application/xml+lasso',)), 'LeanLexer': ('pygments.lexers.theorem', 'Lean', ('lean',), ('*.lean',), ('text/x-lean',)), + 'LessCssLexer': ('pygments.lexers.css', 'LessCss', ('less',), ('*.less',), ('text/x-less-css',)), 'LighttpdConfLexer': ('pygments.lexers.configs', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)), 'LimboLexer': ('pygments.lexers.inferno', 'Limbo', ('limbo',), ('*.b',), ('text/limbo',)), 'LiquidLexer': ('pygments.lexers.templates', 'liquid', ('liquid',), ('*.liquid',), ()), @@ -221,7 +228,7 @@ LEXERS = { 'MatlabSessionLexer': ('pygments.lexers.matlab', 'Matlab session', ('matlabsession',), (), ()), 'MiniDLexer': ('pygments.lexers.d', 'MiniD', ('minid',), (), ('text/x-minidsrc',)), 'ModelicaLexer': ('pygments.lexers.modeling', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), - 'Modula2Lexer': ('pygments.lexers.pascal', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), + 'Modula2Lexer': ('pygments.lexers.modula2', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), 'MoinWikiLexer': ('pygments.lexers.markup', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), 'MonkeyLexer': ('pygments.lexers.basic', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)), 'MoonScriptLexer': ('pygments.lexers.scripting', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')), @@ -258,10 +265,12 @@ LEXERS = { 'ObjectiveJLexer': ('pygments.lexers.javascript', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)), 'OcamlLexer': ('pygments.lexers.ml', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), 'OctaveLexer': ('pygments.lexers.matlab', 'Octave', ('octave',), ('*.m',), ('text/octave',)), + 'OdinLexer': ('pygments.lexers.archetype', 'ODIN', ('odin',), ('*.odin',), ('text/odin',)), 'OocLexer': ('pygments.lexers.ooc', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), 'OpaLexer': ('pygments.lexers.ml', 'Opa', ('opa',), ('*.opa',), ('text/x-opa',)), 'OpenEdgeLexer': ('pygments.lexers.business', 'OpenEdge ABL', ('openedge', 'abl', 'progress'), ('*.p', '*.cls'), ('text/x-openedge', 'application/x-openedge')), 'PanLexer': ('pygments.lexers.dsls', 'Pan', ('pan',), ('*.pan',), ()), + 'ParaSailLexer': ('pygments.lexers.parasail', 'ParaSail', ('parasail',), ('*.psi', '*.psl'), ('text/x-parasail',)), 'PawnLexer': ('pygments.lexers.pawn', 'Pawn', ('pawn',), ('*.p', '*.pwn', '*.inc'), ('text/x-pawn',)), 'Perl6Lexer': ('pygments.lexers.perl', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t'), ('text/x-perl6', 'application/x-perl6')), 'PerlLexer': ('pygments.lexers.perl', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm', '*.t'), ('text/x-perl', 'application/x-perl')), @@ -285,7 +294,7 @@ LEXERS = { 'PythonLexer': ('pygments.lexers.python', 'Python', ('python', 'py', 'sage'), ('*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage'), ('text/x-python', 'application/x-python')), 'PythonTracebackLexer': ('pygments.lexers.python', 'Python Traceback', ('pytb',), ('*.pytb',), ('text/x-python-traceback',)), 'QBasicLexer': ('pygments.lexers.basic', 'QBasic', ('qbasic', 'basic'), ('*.BAS', '*.bas'), ('text/basic',)), - 'QmlLexer': ('pygments.lexers.webmisc', 'QML', ('qml',), ('*.qml',), ('application/x-qml',)), + 'QmlLexer': ('pygments.lexers.webmisc', 'QML', ('qml', 'qbs'), ('*.qml', '*.qbs'), ('application/x-qml', 'application/x-qt.qbs+qml')), 'RConsoleLexer': ('pygments.lexers.r', 'RConsole', ('rconsole', 'rout'), ('*.Rout',), ()), 'RPMSpecLexer': ('pygments.lexers.installers', 'RPMSpec', ('spec',), ('*.spec',), ('text/x-rpm-spec',)), 'RacketLexer': ('pygments.lexers.lisp', 'Racket', ('racket', 'rkt'), ('*.rkt', '*.rktd', '*.rktl'), ('text/x-racket', 'application/x-racket')), @@ -312,7 +321,7 @@ LEXERS = { 'RstLexer': ('pygments.lexers.markup', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), 'RubyConsoleLexer': ('pygments.lexers.ruby', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)), 'RubyLexer': ('pygments.lexers.ruby', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')), - 'RustLexer': ('pygments.lexers.rust', 'Rust', ('rust',), ('*.rs',), ('text/x-rustsrc',)), + 'RustLexer': ('pygments.lexers.rust', 'Rust', ('rust',), ('*.rs',), ('text/rust',)), 'SLexer': ('pygments.lexers.r', 'S', ('splus', 's', 'r'), ('*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'), ('text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', 'text/x-R', 'text/x-r-history', 'text/x-r-profile')), 'SMLLexer': ('pygments.lexers.ml', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')), 'SassLexer': ('pygments.lexers.css', 'Sass', ('sass',), ('*.sass',), ('text/x-sass',)), @@ -335,6 +344,7 @@ LEXERS = { 'SquidConfLexer': ('pygments.lexers.configs', 'SquidConf', ('squidconf', 'squid.conf', 'squid'), ('squid.conf',), ('text/x-squidconf',)), 'SspLexer': ('pygments.lexers.templates', 'Scalate Server Page', ('ssp',), ('*.ssp',), ('application/x-ssp',)), 'StanLexer': ('pygments.lexers.modeling', 'Stan', ('stan',), ('*.stan',), ()), + 'SuperColliderLexer': ('pygments.lexers.supercollider', 'SuperCollider', ('sc', 'supercollider'), ('*.sc', '*.scd'), ('application/supercollider', 'text/supercollider')), 'SwiftLexer': ('pygments.lexers.objective', 'Swift', ('swift',), ('*.swift',), ('text/x-swift',)), 'SwigLexer': ('pygments.lexers.c_like', 'SWIG', ('swig',), ('*.swg', '*.i'), ('text/swig',)), 'SystemVerilogLexer': ('pygments.lexers.hdl', 'systemverilog', ('systemverilog', 'sv'), ('*.sv', '*.svh'), ('text/x-systemverilog',)), @@ -342,10 +352,12 @@ LEXERS = { 'TclLexer': ('pygments.lexers.tcl', 'Tcl', ('tcl',), ('*.tcl', '*.rvt'), ('text/x-tcl', 'text/x-script.tcl', 'application/x-tcl')), 'TcshLexer': ('pygments.lexers.shell', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)), 'TeaTemplateLexer': ('pygments.lexers.templates', 'Tea', ('tea',), ('*.tea',), ('text/x-tea',)), + 'TerraformLexer': ('pygments.lexers.configs', 'Terraform', ('terraform', 'tf'), ('*.tf',), ('application/x-tf', 'application/x-terraform')), 'TexLexer': ('pygments.lexers.markup', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')), 'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)), 'TodotxtLexer': ('pygments.lexers.textfmts', 'Todotxt', ('todotxt',), ('todo.txt', '*.todotxt'), ('text/x-todo',)), 'TreetopLexer': ('pygments.lexers.parsers', 'Treetop', ('treetop',), ('*.treetop', '*.tt'), ()), + 'TurtleLexer': ('pygments.lexers.rdf', 'Turtle', ('turtle',), ('*.ttl',), ('text/turtle', 'application/x-turtle')), 'TwigHtmlLexer': ('pygments.lexers.templates', 'HTML+Twig', ('html+twig',), ('*.twig',), ('text/html+twig',)), 'TwigLexer': ('pygments.lexers.templates', 'Twig', ('twig',), (), ('application/x-twig',)), 'TypeScriptLexer': ('pygments.lexers.javascript', 'TypeScript', ('ts',), ('*.ts',), ('text/x-typescript',)), diff --git a/pygments/lexers/_stan_builtins.py b/pygments/lexers/_stan_builtins.py index 0a225eba..6bf44574 100644 --- a/pygments/lexers/_stan_builtins.py +++ b/pygments/lexers/_stan_builtins.py @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This file contains the names of functions for Stan used by - ``pygments.lexers.math.StanLexer. This is for Stan language version 2.5.0. + ``pygments.lexers.math.StanLexer. This is for Stan language version 2.7.0 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. @@ -35,6 +35,7 @@ TYPES = ( 'positive_ordered', 'real', 'row_vector', + 'row_vectormatrix', 'simplex', 'unit_vector', 'vector', @@ -200,7 +201,6 @@ FUNCTIONS = ( 'lkj_corr_cholesky_rng', 'lkj_corr_log', 'lkj_corr_rng', - 'lkj_cov_log', 'lmgamma', 'log', 'log10', @@ -214,6 +214,7 @@ FUNCTIONS = ( 'log_diff_exp', 'log_falling_factorial', 'log_inv_logit', + 'log_mix', 'log_rising_factorial', 'log_softmax', 'log_sum_exp', @@ -236,6 +237,7 @@ FUNCTIONS = ( 'min', 'modified_bessel_first_kind', 'modified_bessel_second_kind', + 'multi_gp_cholesky_log', 'multi_gp_log', 'multi_normal_cholesky_log', 'multi_normal_cholesky_rng', @@ -248,6 +250,9 @@ FUNCTIONS = ( 'multinomial_rng', 'multiply_log', 'multiply_lower_tri_self_transpose', + 'neg_binomial_2_ccdf_log', + 'neg_binomial_2_cdf', + 'neg_binomial_2_cdf_log', 'neg_binomial_2_log', 'neg_binomial_2_log_log', 'neg_binomial_2_log_rng', @@ -284,6 +289,7 @@ FUNCTIONS = ( 'poisson_cdf_log', 'poisson_log', 'poisson_log_log', + 'poisson_log_rng', 'poisson_rng', 'positive_infinity', 'pow', @@ -371,6 +377,7 @@ FUNCTIONS = ( 'weibull_cdf_log', 'weibull_log', 'weibull_rng', + 'wiener_log', 'wishart_log', 'wishart_rng' ) @@ -400,10 +407,10 @@ DISTRIBUTIONS = ( 'inv_wishart', 'lkj_corr', 'lkj_corr_cholesky', - 'lkj_cov', 'logistic', 'lognormal', 'multi_gp', + 'multi_gp_cholesky', 'multi_normal', 'multi_normal_cholesky', 'multi_normal_prec', @@ -425,6 +432,7 @@ DISTRIBUTIONS = ( 'uniform', 'von_mises', 'weibull', + 'wiener', 'wishart' ) diff --git a/pygments/lexers/archetype.py b/pygments/lexers/archetype.py new file mode 100644 index 00000000..4f1b2645 --- /dev/null +++ b/pygments/lexers/archetype.py @@ -0,0 +1,316 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.archetype + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for Archetype-related syntaxes, including: + + - ODIN syntax <https://github.com/openEHR/odin> + - ADL syntax <http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf> + - cADL sub-syntax of ADL + + For uses of this syntax, see the openEHR archetypes <http://www.openEHR.org/ckm> + + Contributed by Thomas Beale <https://github.com/wolandscat>, + <https://bitbucket.org/thomas_beale>. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups, using, default +from pygments.token import Text, Comment, Name, Literal, Number, String, \ + Punctuation, Keyword, Operator, Generic + +__all__ = ['OdinLexer', 'CadlLexer', 'AdlLexer'] + + +class AtomsLexer(RegexLexer): + """ + Lexer for Values used in ADL and ODIN. + + .. versionadded:: 2.1 + """ + + tokens = { + # ----- pseudo-states for inclusion ----- + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + (r'[ \t]*--.*$', Comment), + ], + 'archetype_id': [ + (r'[ \t]*([a-zA-Z]\w+(\.[a-zA-Z]\w+)*::)?[a-zA-Z]\w+(-[a-zA-Z]\w+){2}' + r'\.\w+[\w-]*\.v\d+(\.\d+){,2}((-[a-z]+)(\.\d+)?)?', Name.Decorator), + ], + 'date_constraints': [ + # ISO 8601-based date/time constraints + (r'[Xx?YyMmDdHhSs\d]{2,4}([:-][Xx?YyMmDdHhSs\d]{2}){2}', Literal.Date), + # ISO 8601-based duration constraints + optional trailing slash + (r'(P[YyMmWwDd]+(T[HhMmSs]+)?|PT[HhMmSs]+)/?', Literal.Date), + ], + 'ordered_values': [ + # ISO 8601 date with optional 'T' ligature + (r'\d{4}-\d{2}-\d{2}T?', Literal.Date), + # ISO 8601 time + (r'\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{4}|Z)?', Literal.Date), + # ISO 8601 duration + (r'P((\d*(\.\d+)?[YyMmWwDd]){1,3}(T(\d*(\.\d+)?[HhMmSs]){,3})?|' + r'T(\d*(\.\d+)?[HhMmSs]){,3})', Literal.Date), + (r'[+-]?(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), + (r'[+-]?(\d+)*\.\d+%?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[+-]?\d+%?', Number.Integer), + ], + 'values': [ + include('ordered_values'), + (r'([Tt]rue|[Ff]alse)', Literal), + (r'"', String, 'string'), + (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'[a-z][a-z0-9+.-]*:', Literal, 'uri'), + # term code + (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)(\w[\w-]*)(\])', + bygroups(Punctuation, Name.Decorator, Punctuation, Name.Decorator, Punctuation)), + (r'\|', Punctuation, 'interval'), + # list continuation + (r'\.\.\.', Punctuation), + ], + 'constraint_values': [ + (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)', + bygroups(Punctuation, Name.Decorator, Punctuation), 'adl14_code_constraint'), + # ADL 1.4 ordinal constraint + (r'(\d*)(\|)(\[\w[\w-]*::\w[\w-]*\])((?:[,;])?)', + bygroups(Number, Punctuation, Name.Decorator, Punctuation)), + include('date_constraints'), + include('values'), + ], + + # ----- real states ----- + 'string': [ + ('"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' + r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), + # all other characters + (r'[^\\"]+', String), + # stray backslash + (r'\\', String), + ], + 'uri': [ + # effective URI terminators + (r'[,>\s]', Punctuation, '#pop'), + (r'[^>\s,]+', Literal), + ], + 'interval': [ + (r'\|', Punctuation, '#pop'), + include('ordered_values'), + (r'\.\.', Punctuation), + (r'[<>=] *', Punctuation), + # handle +/- + (r'\+/-', Punctuation), + (r'\s+', Text), + ], + 'any_code': [ + include('archetype_id'), + # if it is a code + (r'[a-z_]\w*[0-9.]+(@[^\]]+)?', Name.Decorator), + # if it is tuple with attribute names + (r'[a-z_]\w*', Name.Class), + # if it is an integer, i.e. Xpath child index + (r'[0-9]+', Text), + (r'\|', Punctuation, 'code_rubric'), + (r'\]', Punctuation, '#pop'), + # handle use_archetype statement + (r'\s*,\s*', Punctuation), + ], + 'code_rubric': [ + (r'\|', Punctuation, '#pop'), + (r'[^|]+', String), + ], + 'adl14_code_constraint': [ + (r'\]', Punctuation, '#pop'), + (r'\|', Punctuation, 'code_rubric'), + (r'(\w[\w-]*)([;,]?)', bygroups(Name.Decorator, Punctuation)), + include('whitespace'), + ], + } + + +class OdinLexer(AtomsLexer): + """ + Lexer for ODIN syntax. + + .. versionadded:: 2.1 + """ + name = 'ODIN' + aliases = ['odin'] + filenames = ['*.odin'] + mimetypes = ['text/odin'] + + tokens = { + 'path': [ + (r'>', Punctuation, '#pop'), + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'/', Punctuation), + (r'\[', Punctuation, 'key'), + (r'\s*,\s*', Punctuation, '#pop'), + (r'\s+', Text, '#pop'), + ], + 'key': [ + include('values'), + (r'\]', Punctuation, '#pop'), + ], + 'type_cast': [ + (r'\)', Punctuation, '#pop'), + (r'[^)]+', Name.Class), + ], + 'root': [ + include('whitespace'), + (r'([Tt]rue|[Ff]alse)', Literal), + include('values'), + # x-ref path + (r'/', Punctuation, 'path'), + # x-ref path starting with key + (r'\[', Punctuation, 'key'), + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'=', Operator), + (r'\(', Punctuation, 'type_cast'), + (r',', Punctuation), + (r'<', Punctuation), + (r'>', Punctuation), + (r';', Punctuation), + ], + } + + +class CadlLexer(AtomsLexer): + """ + Lexer for cADL syntax. + + .. versionadded:: 2.1 + """ + name = 'cADL' + aliases = ['cadl'] + filenames = ['*.cadl'] + + tokens = { + 'path': [ + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'/', Punctuation), + (r'\[', Punctuation, 'any_code'), + (r'\s+', Punctuation, '#pop'), + ], + 'root': [ + include('whitespace'), + (r'(cardinality|existence|occurrences|group|include|exclude|' + r'allow_archetype|use_archetype|use_node)\W', Keyword.Type), + (r'(and|or|not|there_exists|xor|implies|for_all)\W', Keyword.Type), + (r'(after|before|closed)\W', Keyword.Type), + (r'(not)\W', Operator), + (r'(matches|is_in)\W', Operator), + # is_in / not is_in char + (u'(\u2208|\u2209)', Operator), + # there_exists / not there_exists / for_all / and / or + (u'(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)', + Operator), + # regex in slot or as string constraint + (r'(\{)(\s*/[^}]+/\s*)(\})', + bygroups(Punctuation, String.Regex, Punctuation)), + # regex in slot or as string constraint + (r'(\{)(\s*\^[^}]+\^\s*)(\})', + bygroups(Punctuation, String.Regex, Punctuation)), + (r'/', Punctuation, 'path'), + # for cardinality etc + (r'(\{)((?:\d+\.\.)?(?:\d+|\*))((?:\s*;\s*(?:ordered|unordered|unique)){,2})(\})', + bygroups(Punctuation, Number, Number, Punctuation)), + # [{ is start of a tuple value + (r'\[\{', Punctuation), + (r'\}\]', Punctuation), + (r'\{', Punctuation), + (r'\}', Punctuation), + include('constraint_values'), + # type name + (r'[A-Z]\w+(<[A-Z]\w+([A-Za-z_<>]*)>)?', Name.Class), + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'\[', Punctuation, 'any_code'), + (r'(~|//|\\\\|\+|-|/|\*|\^|!=|=|<=|>=|<|>]?)', Operator), + (r'\(', Punctuation), + (r'\)', Punctuation), + # for lists of values + (r',', Punctuation), + (r'"', String, 'string'), + # for assumed value + (r';', Punctuation), + ], + } + + +class AdlLexer(AtomsLexer): + """ + Lexer for ADL syntax. + + .. versionadded:: 2.1 + """ + + name = 'ADL' + aliases = ['adl'] + filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx'] + + tokens = { + 'whitespace': [ + # blank line ends + (r'\s*\n', Text), + # comment-only line + (r'^[ \t]*--.*$', Comment), + ], + 'odin_section': [ + # repeating the following two rules from the root state enable multi-line strings + # that start in the first column to be dealt with + (r'^(language|description|ontology|terminology|annotations|' + r'component_terminologies|revision_history)[ \t]*\n', Generic.Heading), + (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), + (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)), + (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)), + # template overlay delimiter + (r'^----------*\n', Text, '#pop'), + (r'^.*\n', String), + default('#pop'), + ], + 'cadl_section': [ + (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)), + default('#pop'), + ], + 'rules_section': [ + (r'^[ \t]+.*\n', using(CadlLexer)), + default('#pop'), + ], + 'metadata': [ + (r'\)', Punctuation, '#pop'), + (r';', Punctuation), + (r'([Tt]rue|[Ff]alse)', Literal), + # numbers and version ids + (r'\d+(\.\d+)*', Literal), + # Guids + (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal), + (r'\w+', Name.Class), + (r'"', String, 'string'), + (r'=', Operator), + (r'[ \t]+', Text), + default('#pop'), + ], + 'root': [ + (r'^(archetype|template_overlay|operational_template|template|' + r'speciali[sz]e)', Generic.Heading), + (r'^(language|description|ontology|terminology|annotations|' + r'component_terminologies|revision_history)[ \t]*\n', + Generic.Heading, 'odin_section'), + (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), + (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'), + include('archetype_id'), + (r'[ \t]*\(', Punctuation, 'metadata'), + include('whitespace'), + ], + } diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index c308f7fc..918ed83b 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -286,7 +286,8 @@ class LlvmLexer(RegexLexer): r'|lshr|ashr|and|or|xor|icmp|fcmp' r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui' - r'|fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch' + r'|fptosi|inttoptr|ptrtoint|bitcast|addrspacecast' + r'|select|va_arg|ret|br|switch' r'|invoke|unwind|unreachable' r'|indirectbr|landingpad|resume' diff --git a/pygments/lexers/automation.py b/pygments/lexers/automation.py index c8e6b0b8..2ebc4d24 100644 --- a/pygments/lexers/automation.py +++ b/pygments/lexers/automation.py @@ -316,7 +316,8 @@ class AutoItLexer(RegexLexer): tokens = { 'root': [ (r';.*\n', Comment.Single), - (r'(#comments-start|#cs).*?(#comments-end|#ce)', Comment.Multiline), + (r'(#comments-start|#cs)(.|\n)*?(#comments-end|#ce)', + Comment.Multiline), (r'[\[\]{}(),;]', Punctuation), (r'(and|or|not)\b', Operator.Word), (r'[$|@][a-zA-Z_]\w*', Name.Variable), diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index b565b97b..35ea517f 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -28,8 +28,10 @@ class CFamilyLexer(RegexLexer): #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' + + # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment - _ws1 = r'\s*(?:/[*].*?[*]/\s*)*' + _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ @@ -202,7 +204,7 @@ class CppLexer(CFamilyLexer): 'export', 'friend', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', 'reinterpret_cast', 'restrict', 'static_cast', 'template', 'this', 'throw', 'throws', - 'typeid', 'typename', 'using', 'virtual', + 'try', 'typeid', 'typename', 'using', 'virtual', 'constexpr', 'nullptr', 'decltype', 'thread_local', 'alignas', 'alignof', 'static_assert', 'noexcept', 'override', 'final'), suffix=r'\b'), Keyword), diff --git a/pygments/lexers/c_like.py b/pygments/lexers/c_like.py index a08d86a3..27736bff 100644 --- a/pygments/lexers/c_like.py +++ b/pygments/lexers/c_like.py @@ -414,7 +414,7 @@ class MqlLexer(CppLexer): class ArduinoLexer(CppLexer): """ - For `Arduino® <https://arduino.cc/>`_ source. + For `Arduino(tm) <https://arduino.cc/>`_ source. This is an extension of the CppLexer, as the Arduino® Language is a superset of C++ diff --git a/pygments/lexers/chapel.py b/pygments/lexers/chapel.py index 520be37b..5b7be4dd 100644 --- a/pygments/lexers/chapel.py +++ b/pygments/lexers/chapel.py @@ -46,9 +46,10 @@ class ChapelLexer(RegexLexer): 'continue', 'delete', 'dmapped', 'do', 'domain', 'else', 'enum', 'export', 'extern', 'for', 'forall', 'if', 'index', 'inline', 'iter', 'label', 'lambda', 'let', 'local', 'new', 'noinit', 'on', - 'otherwise', 'pragma', 'reduce', 'return', 'scan', 'select', - 'serial', 'single', 'sparse', 'subdomain', 'sync', 'then', 'use', - 'when', 'where', 'while', 'with', 'yield', 'zip'), suffix=r'\b'), + 'otherwise', 'pragma', 'private', 'public', 'reduce', + 'require', 'return', 'scan', 'select', 'serial', 'single', + 'sparse', 'subdomain', 'sync', 'then', 'use', 'when', 'where', + 'while', 'with', 'yield', 'zip'), suffix=r'\b'), Keyword), (r'(proc)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'procname'), (r'(class|module|record|union)(\s+)', bygroups(Keyword, Text), diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 1bd8f55a..6b00e5f4 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -18,7 +18,8 @@ from pygments.lexers.shell import BashLexer __all__ = ['IniLexer', 'RegeditLexer', 'PropertiesLexer', 'KconfigLexer', 'Cfengine3Lexer', 'ApacheConfLexer', 'SquidConfLexer', - 'NginxConfLexer', 'LighttpdConfLexer', 'DockerLexer'] + 'NginxConfLexer', 'LighttpdConfLexer', 'DockerLexer', + 'TerraformLexer'] class IniLexer(RegexLexer): @@ -544,3 +545,75 @@ class DockerLexer(RegexLexer): (r'(.*\\\n)*.+', using(BashLexer)), ], } + + +class TerraformLexer(RegexLexer): + """ + Lexer for `terraformi .tf files <https://www.terraform.io/>`_ + + .. versionadded:: 2.1 + """ + + name = 'Terraform' + aliases = ['terraform', 'tf'] + filenames = ['*.tf'] + mimetypes = ['application/x-tf', 'application/x-terraform'] + + tokens = { + 'root': [ + include('string'), + include('punctuation'), + include('curly'), + include('basic'), + include('whitespace'), + (r'[0-9]+', Number), + ], + 'basic': [ + (words(('true', 'false'), prefix=r'\b', suffix=r'\b'), Keyword.Type), + (r'\s*/\*', Comment.Multiline, 'comment'), + (r'\s*#.*\n', Comment.Single), + (r'(.*?)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)), + (words(('variable', 'resource', 'provider', 'provisioner', 'module'), + prefix=r'\b', suffix=r'\b'), Keyword.Reserved, 'function'), + (words(('ingress', 'egress', 'listener', 'default', 'connection'), + prefix=r'\b', suffix=r'\b'), Keyword.Declaration), + ('\$\{', String.Interpol, 'var_builtin'), + ], + 'function': [ + (r'(\s+)(".*")(\s+)', bygroups(Text, String, Text)), + include('punctuation'), + include('curly'), + ], + 'var_builtin': [ + (r'\$\{', String.Interpol, '#push'), + (words(('concat', 'file', 'join', 'lookup', 'element'), + prefix=r'\b', suffix=r'\b'), Name.Builtin), + include('string'), + include('punctuation'), + (r'\s+', Text), + (r'\}', String.Interpol, '#pop'), + ], + 'string':[ + (r'(".*")', bygroups(String.Double)), + ], + 'punctuation':[ + (r'[\[\]\(\),.]', Punctuation), + ], + # Keep this seperate from punctuation - we sometimes want to use different + # Tokens for { } + 'curly':[ + (r'\{', Text.Punctuation), + (r'\}', Text.Punctuation), + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ], + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), + ], + } diff --git a/pygments/lexers/css.py b/pygments/lexers/css.py index c11e7ec0..4165bcc1 100644 --- a/pygments/lexers/css.py +++ b/pygments/lexers/css.py @@ -13,12 +13,12 @@ import re import copy from pygments.lexer import ExtendedRegexLexer, RegexLexer, include, bygroups, \ - default, words + default, words, inherit from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation from pygments.util import iteritems -__all__ = ['CssLexer', 'SassLexer', 'ScssLexer'] +__all__ = ['CssLexer', 'SassLexer', 'ScssLexer', 'LessCssLexer'] class CssLexer(RegexLexer): @@ -484,6 +484,7 @@ class ScssLexer(RegexLexer): (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*:', Operator, 'value'), + default('#pop'), ], 'inline-comment': [ @@ -496,3 +497,27 @@ class ScssLexer(RegexLexer): tokens[group] = copy.copy(common) tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) + + +class LessCssLexer(CssLexer): + """ + For `LESS <http://lesscss.org/>`_ styleshets. + + .. versionadded:: 2.1 + """ + + name = 'LessCss' + aliases = ['less'] + filenames = ['*.less'] + mimetypes = ['text/x-less-css'] + + tokens = { + 'root': [ + (r'@\w+', Name.Variable), + inherit, + ], + 'content': [ + (r'{', Punctuation, '#push'), + inherit, + ], + } diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py index f61b292d..1f317260 100644 --- a/pygments/lexers/esoteric.py +++ b/pygments/lexers/esoteric.py @@ -9,11 +9,11 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, include +from pygments.lexer import RegexLexer, include, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Error + Number, Punctuation, Error, Whitespace -__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer'] +__all__ = ['BrainfuckLexer', 'BefungeLexer', 'BoogieLexer', 'RedcodeLexer'] class BrainfuckLexer(RegexLexer): @@ -112,3 +112,48 @@ class RedcodeLexer(RegexLexer): (r'[-+]?\d+', Number.Integer), ], } + + +class BoogieLexer(RegexLexer): + """ + For `Boogie <https://boogie.codeplex.com/>`_ source code. + + .. versionadded:: 2.0 + """ + name = 'Boogie' + aliases = ['boogie'] + filenames = ['*.bpl'] + + tokens = { + 'root': [ + # Whitespace and Comments + (r'\n', Whitespace), + (r'\s+', Whitespace), + (r'//[/!](.*?)\n', Comment.Doc), + (r'//(.*?)\n', Comment.Single), + (r'/\*', Comment.Multiline, 'comment'), + + (words(( + 'axiom', 'break', 'call', 'ensures', 'else', 'exists', 'function', + 'forall', 'if', 'invariant', 'modifies', 'procedure', 'requires', + 'then', 'var', 'while'), + suffix=r'\b'), Keyword), + (words(('const',), suffix=r'\b'), Keyword.Reserved), + + (words(('bool', 'int', 'ref'), suffix=r'\b'), Keyword.Type), + include('numbers'), + (r"(>=|<=|:=|!=|==>|&&|\|\||[+/\-=>*<\[\]])", Operator), + (r"([{}():;,.])", Punctuation), + # Identifier + (r'[a-zA-Z_]\w*', Name), + ], + 'comment': [ + (r'[^*/]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline), + ], + 'numbers': [ + (r'[0-9]+', Number.Integer), + ], + } diff --git a/pygments/lexers/fortran.py b/pygments/lexers/fortran.py index 8ba54aff..d822160f 100644 --- a/pygments/lexers/fortran.py +++ b/pygments/lexers/fortran.py @@ -11,11 +11,11 @@ import re -from pygments.lexer import RegexLexer, include, words +from pygments.lexer import RegexLexer, bygroups, include, words, using from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation + Number, Punctuation, Generic -__all__ = ['FortranLexer'] +__all__ = ['FortranLexer', 'FortranFixedLexer'] class FortranLexer(RegexLexer): @@ -26,7 +26,7 @@ class FortranLexer(RegexLexer): """ name = 'Fortran' aliases = ['fortran'] - filenames = ['*.f', '*.f90', '*.F', '*.F90'] + filenames = ['*.f03', '*.f90', '*.F03', '*.F90'] mimetypes = ['text/x-fortran'] flags = re.IGNORECASE | re.MULTILINE @@ -159,3 +159,47 @@ class FortranLexer(RegexLexer): (r'[+-]?\d+\.\d*(e[-+]?\d+)?(_[a-z]\w+)?', Number.Float), ], } + + +class FortranFixedLexer(RegexLexer): + """ + Lexer for fixed format Fortran. + + .. versionadded:: 2.1 + """ + name = 'FortranFixed' + aliases = ['fortranfixed'] + filenames = ['*.f', '*.F'] + + flags = re.IGNORECASE + + def _lex_fortran(self, match, ctx=None): + """Lex a line just as free form fortran without line break.""" + lexer = FortranLexer() + text = match.group(0) + "\n" + for index, token, value in lexer.get_tokens_unprocessed(text): + value = value.replace('\n', '') + if value != '': + yield index, token, value + + tokens = { + 'root': [ + (r'[C*].*\n', Comment), + (r'#.*\n', Comment.Preproc), + (r' {0,4}!.*\n', Comment), + (r'(.{5})', Name.Label, 'cont-char'), + (r'.*\n', using(FortranLexer)), + ], + + 'cont-char': [ + (' ', Text, 'code'), + ('0', Comment, 'code'), + ('.', Generic.Strong, 'code') + ], + + 'code': [ + (r'(.{66})(.*)(\n)', + bygroups(_lex_fortran, Comment, Text), 'root'), + (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), + (r'', Text, 'root')] + } diff --git a/pygments/lexers/html.py b/pygments/lexers/html.py index 1c35325f..7893952f 100644 --- a/pygments/lexers/html.py +++ b/pygments/lexers/html.py @@ -46,12 +46,19 @@ class HtmlLexer(RegexLexer): ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), - (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), - (r'<\s*style\s*', Name.Tag, ('style-content', 'tag')), + (r'(<)(\s*)(script)(\s*)', + bygroups(Punctuation, Text, Name.Tag, Text), + ('script-content', 'tag')), + (r'(<)(\s*)(style)(\s*)', + bygroups(Punctuation, Text, Name.Tag, Text), + ('style-content', 'tag')), # note: this allows tag names not used in HTML like <x:with-dash>, # this is to support yet-unknown template engines and the like - (r'<\s*[\w:.-]+', Name.Tag, 'tag'), - (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), + (r'(<)(\s*)([\w:.-]+)', + bygroups(Punctuation, Text, Name.Tag), 'tag'), + (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation)), ], 'comment': [ ('[^-]+', Comment), @@ -60,16 +67,21 @@ class HtmlLexer(RegexLexer): ], 'tag': [ (r'\s+', Text), - (r'([\w:-]+\s*=)(\s*)', bygroups(Name.Attribute, Text), 'attr'), + (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text), + 'attr'), (r'[\w:-]+', Name.Attribute), - (r'/?\s*>', Name.Tag, '#pop'), + (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'), ], 'script-content': [ - (r'<\s*/\s*script\s*>', Name.Tag, '#pop'), + (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation), '#pop'), (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), ], 'style-content': [ - (r'<\s*/\s*style\s*>', Name.Tag, '#pop'), + (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation),'#pop'), (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), ], 'attr': [ diff --git a/pygments/lexers/igor.py b/pygments/lexers/igor.py index f558b80a..b0eaf6aa 100644 --- a/pygments/lexers/igor.py +++ b/pygments/lexers/igor.py @@ -35,16 +35,17 @@ class IgorLexer(RegexLexer): flowControl = ( 'if', 'else', 'elseif', 'endif', 'for', 'endfor', 'strswitch', 'switch', 'case', 'default', 'endswitch', 'do', 'while', 'try', 'catch', 'endtry', - 'break', 'continue', 'return', + 'break', 'continue', 'return', 'AbortOnRTE', 'AbortOnValue' ) types = ( 'variable', 'string', 'constant', 'strconstant', 'NVAR', 'SVAR', 'WAVE', - 'STRUCT', 'dfref' + 'STRUCT', 'dfref', 'funcref', 'char', 'uchar', 'int16', 'uint16', 'int32', + 'uint32', 'float', 'double' ) keywords = ( - 'override', 'ThreadSafe', 'static', 'FuncFit', 'Proc', 'Picture', - 'Prompt', 'DoPrompt', 'macro', 'window', 'graph', 'function', 'end', - 'Structure', 'EndStructure', 'EndMacro', 'Menu', 'SubMenu', + 'override', 'ThreadSafe', 'MultiThread', 'static', 'Proc', + 'Picture', 'Prompt', 'DoPrompt', 'macro', 'window', 'function', 'end', + 'Structure', 'EndStructure', 'EndMacro', 'Menu', 'SubMenu' ) operations = ( 'Abort', 'AddFIFOData', 'AddFIFOVectData', 'AddMovieAudio', @@ -161,7 +162,7 @@ class IgorLexer(RegexLexer): 'CreationDate', 'csc', 'DataFolderExists', 'DataFolderRefsEqual', 'DataFolderRefStatus', 'date2secs', 'datetime', 'DateToJulian', 'Dawson', 'DDEExecute', 'DDEInitiate', 'DDEPokeString', 'DDEPokeWave', - 'DDERequestWave', 'DDEStatus', 'DDETerminate', 'deltax', 'digamma', + 'DDERequestWave', 'DDEStatus', 'DDETerminate', 'defined', 'deltax', 'digamma', 'DimDelta', 'DimOffset', 'DimSize', 'ei', 'enoise', 'equalWaves', 'erf', 'erfc', 'exists', 'exp', 'expInt', 'expNoise', 'factorial', 'fakedata', 'faverage', 'faverageXY', 'FindDimLabel', 'FindListItem', 'floor', @@ -223,7 +224,7 @@ class IgorLexer(RegexLexer): 'ThreadGroupWait', 'ThreadProcessorCount', 'ThreadReturnValue', 'ticks', 'trunc', 'Variance', 'vcsr', 'WaveCRC', 'WaveDims', 'WaveExists', 'WaveMax', 'WaveMin', 'WaveRefsEqual', 'WaveType', 'WhichListItem', - 'WinType', 'WNoise', 'x', 'x2pnt', 'xcsr', 'y', 'z', 'zcsr', 'ZernikeR', + 'WinType', 'WNoise', 'x2pnt', 'xcsr', 'zcsr', 'ZernikeR', ) functions += ( 'AddListItem', 'AnnotationInfo', 'AnnotationList', 'AxisInfo', diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index fa7dca41..7dcfbb4b 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -36,7 +36,7 @@ class JavascriptLexer(RegexLexer): name = 'JavaScript' aliases = ['js', 'javascript'] - filenames = ['*.js', ] + filenames = ['*.js', '*.jsm', ] mimetypes = ['application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript', ] @@ -60,7 +60,7 @@ class JavascriptLexer(RegexLexer): (r'\n', Text, '#pop') ], 'root': [ - (r'\A#! ?/.*?\n', Comment), # shebang lines are recognized by node.js + (r'\A#! ?/.*?\n', Comment.Hashbang), # recognized by node.js (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' diff --git a/pygments/lexers/julia.py b/pygments/lexers/julia.py index 1b7d543a..cf7c7d61 100644 --- a/pygments/lexers/julia.py +++ b/pygments/lexers/julia.py @@ -14,7 +14,7 @@ import re from pygments.lexer import Lexer, RegexLexer, bygroups, combined, do_insertions from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic -from pygments.util import shebang_matches +from pygments.util import shebang_matches, unirange __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] @@ -30,6 +30,8 @@ class JuliaLexer(RegexLexer): filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] + flags = re.MULTILINE | re.UNICODE + builtins = [ 'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple', 'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype', @@ -89,7 +91,8 @@ class JuliaLexer(RegexLexer): # names (r'@[\w.]+', Name.Decorator), - (r'[a-zA-Z_]\w*', Name), + (u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' % + ((unirange(0x10000, 0x10ffff),)*2), Name), # numbers (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 6b302c7e..4d3c9159 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -457,7 +457,7 @@ class GroovyLexer(RegexLexer): name = 'Groovy' aliases = ['groovy'] - filenames = ['*.groovy'] + filenames = ['*.groovy','*.gradle'] mimetypes = ['text/x-groovy'] flags = re.MULTILINE | re.DOTALL diff --git a/pygments/lexers/modeling.py b/pygments/lexers/modeling.py index 43194436..ec99543f 100644 --- a/pygments/lexers/modeling.py +++ b/pygments/lexers/modeling.py @@ -284,8 +284,8 @@ class StanLexer(RegexLexer): """Pygments Lexer for Stan models. The Stan modeling language is specified in the *Stan Modeling Language - User's Guide and Reference Manual, v2.5.0*, - `pdf <https://github.com/stan-dev/stan/releases/download/v2.5.0/stan-reference-2.5.0.pdf>`__. + User's Guide and Reference Manual, v2.7.0*, + `pdf <https://github.com/stan-dev/stan/releases/download/v2.7.0/stan-reference-2.7.0.pdf>`__. .. versionadded:: 1.6 """ diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py new file mode 100644 index 00000000..d32bb5bb --- /dev/null +++ b/pygments/lexers/modula2.py @@ -0,0 +1,1566 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.modula2 + ~~~~~~~~~~~~~~~~~~~~~~~ + + Multi-Dialect Lexer for Modula-2. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include +from pygments.util import get_bool_opt, get_list_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, \ + String, Number, Punctuation, Error + +__all__ = ['Modula2Lexer'] + + +# Multi-Dialect Modula-2 Lexer +class Modula2Lexer(RegexLexer): + """ + For `Modula-2 <http://www.modula2.org/>`_ source code. + + The Modula-2 lexer supports several dialects. By default, it operates in + fallback mode, recognising the *combined* literals, punctuation symbols + and operators of all supported dialects, and the *combined* reserved words + and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not + differentiating between library defined identifiers. + + To select a specific dialect, a dialect option may be passed + or a dialect tag may be embedded into a source file. + + Dialect Options: + + `m2pim` + Select PIM Modula-2 dialect. + `m2iso` + Select ISO Modula-2 dialect. + `m2r10` + Select Modula-2 R10 dialect. + `objm2` + Select Objective Modula-2 dialect. + + The PIM and ISO dialect options may be qualified with a language extension. + + Language Extensions: + + `+aglet` + Select Aglet Modula-2 extensions, available with m2iso. + `+gm2` + Select GNU Modula-2 extensions, available with m2pim. + `+p1` + Select p1 Modula-2 extensions, available with m2iso. + `+xds` + Select XDS Modula-2 extensions, available with m2iso. + + + Passing a Dialect Option via Unix Commandline Interface + + Dialect options may be passed to the lexer using the `dialect` key. + Only one such option should be passed. If multiple dialect options are + passed, the first valid option is used, any subsequent options are ignored. + + Examples: + + `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input` + Use ISO dialect to render input to HTML output + `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input` + Use ISO dialect with p1 extensions to render input to RTF output + + + Embedding a Dialect Option within a source file + + A dialect option may be embedded in a source file in form of a dialect + tag, a specially formatted comment that specifies a dialect option. + + Dialect Tag EBNF: + + dialectTag : + OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ; + + dialectOption : + 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' | + 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ; + + Prefix : '!' ; + + OpeningCommentDelim : '(*' ; + + ClosingCommentDelim : '*)' ; + + No whitespace is permitted between the tokens of a dialect tag. + + In the event that a source file contains multiple dialect tags, the first + tag that contains a valid dialect option will be used and any subsequent + dialect tags will be ignored. Ideally, a dialect tag should be placed + at the beginning of a source file. + + An embedded dialect tag overrides a dialect option set via command line. + + Examples: + + `(*!m2r10*) DEFINITION MODULE Foobar; ...` + Use Modula2 R10 dialect to render this source file. + `(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...` + Use PIM dialect with GNU extensions to render this source file. + + + Algol Publication Mode: + + In Algol publication mode, source text is rendered for publication of + algorithms in scientific papers and academic texts, following the format + of the Revised Algol-60 Language Report. It is activated by passing + one of two corresponding styles as an option: + + `algol` + render reserved words lowercase underline boldface + and builtins lowercase boldface italic + `algol_nu` + render reserved words lowercase boldface (no underlining) + and builtins lowercase boldface italic + + The lexer automatically performs the required lowercase conversion when + this mode is activated. + + Example: + + `$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input` + Render input file in Algol publication mode to LaTeX output. + + + Rendering Mode of First Class ADT Identifiers: + + The rendering of standard library first class ADT identifiers is controlled + by option flag "treat_stdlib_adts_as_builtins". + + When this option is turned on, standard library ADT identifiers are rendered + as builtins. When it is turned off, they are rendered as ordinary library + identifiers. + + `treat_stdlib_adts_as_builtins` (default: On) + + The option is useful for dialects that support ADTs as first class objects + and provide ADTs in the standard library that would otherwise be built-in. + + At present, only Modula-2 R10 supports library ADTs as first class objects + and therefore, no ADT identifiers are defined for any other dialects. + + Example: + + `$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...` + Render standard library ADTs as ordinary library types. + + .. versionadded:: 1.3 + + .. versionchanged:: 2.1 + Added multi-dialect support. + """ + name = 'Modula-2' + aliases = ['modula2', 'm2'] + filenames = ['*.def', '*.mod'] + mimetypes = ['text/x-modula2'] + + flags = re.MULTILINE | re.DOTALL + + tokens = { + 'whitespace': [ + (r'\n+', Text), # blank lines + (r'\s+', Text), # whitespace + ], + 'dialecttags': [ + # PIM Dialect Tag + (r'\(\*!m2pim\*\)', Comment.Special), + # ISO Dialect Tag + (r'\(\*!m2iso\*\)', Comment.Special), + # M2R10 Dialect Tag + (r'\(\*!m2r10\*\)', Comment.Special), + # ObjM2 Dialect Tag + (r'\(\*!objm2\*\)', Comment.Special), + # Aglet Extensions Dialect Tag + (r'\(\*!m2iso\+aglet\*\)', Comment.Special), + # GNU Extensions Dialect Tag + (r'\(\*!m2pim\+gm2\*\)', Comment.Special), + # p1 Extensions Dialect Tag + (r'\(\*!m2iso\+p1\*\)', Comment.Special), + # XDS Extensions Dialect Tag + (r'\(\*!m2iso\+xds\*\)', Comment.Special), + ], + 'identifiers': [ + (r'([a-zA-Z_$][\w$]*)', Name), + ], + 'prefixed_number_literals': [ + # + # Base-2, whole number + (r'0b[01]+(\'[01]+)*', Number.Bin), + # + # Base-16, whole number + (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex), + ], + 'plain_number_literals': [ + # + # Base-10, real number with exponent + (r'[0-9]+(\'[0-9]+)*' # integral part \ + r'\.[0-9]+(\'[0-9]+)*' # fractional part \ + r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent \ + Number.Float), + # + # Base-10, real number without exponent + (r'[0-9]+(\'[0-9]+)*' # integral part \ + r'\.[0-9]+(\'[0-9]+)*', # fractional part \ + Number.Float), + # + # Base-10, whole number + (r'[0-9]+(\'[0-9]+)*', Number.Integer), + ], + 'suffixed_number_literals': [ + # + # Base-8, whole number + (r'[0-7]+B', Number.Oct), + # + # Base-8, character code + (r'[0-7]+C', Number.Oct), + # + # Base-16, number + (r'[0-9A-F]+H', Number.Hex), + ], + 'string_literals': [ + (r"'(\\\\|\\'|[^'])*'", String), # single quoted string + (r'"(\\\\|\\"|[^"])*"', String), # double quoted string + ], + 'digraph_operators': [ + # Dot Product Operator + (r'\*\.', Operator), + # Array Concatenation Operator + (r'\+>', Operator), # M2R10 + ObjM2 + # Inequality Operator + (r'<>', Operator), # ISO + PIM + # Less-Or-Equal, Subset + (r'<=', Operator), + # Greater-Or-Equal, Superset + (r'>=', Operator), + # Identity Operator + (r'==', Operator), # M2R10 + ObjM2 + # Type Conversion Operator + (r'::', Operator), # M2R10 + ObjM2 + # Assignment Symbol + (r':=', Operator), + # Postfix Increment Mutator + (r'\+\+', Operator), # M2R10 + ObjM2 + # Postfix Decrement Mutator + (r'--', Operator), # M2R10 + ObjM2 + ], + 'unigraph_operators': [ + # Arithmetic Operators + (r'[+-]', Operator), + (r'[*/]', Operator), + # ISO 80000-2 compliant Set Difference Operator + (r'\\', Operator), # M2R10 + ObjM2 + # Relational Operators + (r'[=#<>]', Operator), + # Dereferencing Operator + (r'\^', Operator), + # Dereferencing Operator Synonym + (r'@', Operator), # ISO + # Logical AND Operator Synonym + (r'&', Operator), # PIM + ISO + # Logical NOT Operator Synonym + (r'~', Operator), # PIM + ISO + # Smalltalk Message Prefix + (r'`', Operator), # ObjM2 + ], + 'digraph_punctuation': [ + # Range Constructor + (r'\.\.', Punctuation), + # Opening Chevron Bracket + (r'<<', Punctuation), # M2R10 + ISO + # Closing Chevron Bracket + (r'>>', Punctuation), # M2R10 + ISO + # Blueprint Punctuation + (r'->', Punctuation), # M2R10 + ISO + # Distinguish |# and # in M2 R10 + (r'\|#', Punctuation), + # Distinguish ## and # in M2 R10 + (r'##', Punctuation), + # Distinguish |* and * in M2 R10 + (r'\|\*', Punctuation), + ], + 'unigraph_punctuation': [ + # Common Punctuation + (r'[\(\)\[\]{},.:;\|]', Punctuation), + # Case Label Separator Synonym + (r'!', Punctuation), # ISO + # Blueprint Punctuation + (r'\?', Punctuation), # M2R10 + ObjM2 + ], + 'comments': [ + # Single Line Comment + (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2 + # Block Comment + (r'\(\*([^$].*?)\*\)', Comment.Multiline), + # Template Block Comment + (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2 + ], + 'pragmas': [ + # ISO Style Pragmas + (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2 + # Pascal Style Pragmas + (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM + ], + 'root': [ + include('whitespace'), + include('dialecttags'), + include('pragmas'), + include('comments'), + include('identifiers'), + include('suffixed_number_literals'), # PIM + ISO + include('prefixed_number_literals'), # M2R10 + ObjM2 + include('plain_number_literals'), + include('string_literals'), + include('digraph_punctuation'), + include('digraph_operators'), + include('unigraph_punctuation'), + include('unigraph_operators'), + ] + } + +# C o m m o n D a t a s e t s + + # Common Reserved Words Dataset + common_reserved_words = ( + # 37 common reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF', + 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', + 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', + 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', + ) + + # Common Builtins Dataset + common_builtins = ( + # 16 common builtins + 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER', + 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL', + 'TRUE', + ) + + # Common Pseudo-Module Builtins Dataset + common_pseudo_builtins = ( + # 4 common pseudo builtins + 'ADDRESS', 'BYTE', 'WORD', 'ADR' + ) + +# P I M M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for PIM Modula-2 + pim_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', + '+>', '->', '<<', '>>', '|#', '##', + ) + + # PIM Modula-2 Additional Reserved Words Dataset + pim_additional_reserved_words = ( + # 3 additional reserved words + 'EXPORT', 'QUALIFIED', 'WITH', + ) + + # PIM Modula-2 Additional Builtins Dataset + pim_additional_builtins = ( + # 16 additional builtins + 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH', + 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL', + ) + + # PIM Modula-2 Additional Pseudo-Module Builtins Dataset + pim_additional_pseudo_builtins = ( + # 5 additional pseudo builtins + 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER', + ) + +# I S O M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for ISO Modula-2 + iso_lexemes_to_reject = ( + '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->', + '<<', '>>', '|#', '##', + ) + + # ISO Modula-2 Additional Reserved Words Dataset + iso_additional_reserved_words = ( + # 9 additional reserved words (ISO 10514-1) + 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED', + 'REM', 'RETRY', 'WITH', + # 10 additional reserved words (ISO 10514-2 & ISO 10514-3) + 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY', + 'REVEAL', 'TRACED', 'UNSAFEGUARDED', + ) + + # ISO Modula-2 Additional Builtins Dataset + iso_additional_builtins = ( + # 26 additional builtins (ISO 10514-1) + 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', + 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH', + 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE', + 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', + # 5 additional builtins (ISO 10514-2 & ISO 10514-3) + 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF', + ) + + # ISO Modula-2 Additional Pseudo-Module Builtins Dataset + iso_additional_pseudo_builtins = ( + # 14 additional builtins (SYSTEM) + 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC', + 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR', + 'ROTATE', 'SHIFT', 'CAST', 'TSIZE', + # 13 additional builtins (COROUTINES) + 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER', + 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN', + 'NEWCOROUTINE', 'PROT', 'TRANSFER', + # 9 additional builtins (EXCEPTIONS) + 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber', + 'ExceptionSource', 'GetMessage', 'IsCurrentSource', + 'IsExceptionalExecution', 'RAISE', + # 3 additional builtins (TERMINATION) + 'TERMINATION', 'IsTerminating', 'HasHalted', + # 4 additional builtins (M2EXCEPTION) + 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception', + 'indexException', 'rangeException', 'caseSelectException', + 'invalidLocation', 'functionException', 'wholeValueException', + 'wholeDivException', 'realValueException', 'realDivException', + 'complexValueException', 'complexDivException', 'protException', + 'sysException', 'coException', 'exException', + ) + +# M o d u l a - 2 R 1 0 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Modula-2 R10 + m2r10_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '&', '<>', + ) + + # Modula-2 R10 reserved words in addition to the common set + m2r10_additional_reserved_words = ( + # 12 additional reserved words + 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE', + 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN', + # 2 additional reserved words with symbolic assembly option + 'ASM', 'REG', + ) + + # Modula-2 R10 builtins in addition to the common set + m2r10_additional_builtins = ( + # 26 additional builtins + 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD', + 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT', + 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', + 'UNICHAR', 'WRITE', 'WRITEF', + ) + + # Modula-2 R10 Additional Pseudo-Module Builtins Dataset + m2r10_additional_pseudo_builtins = ( + # 13 additional builtins (TPROPERTIES) + 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL', + 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION', + 'TMAXEXP', 'TMINEXP', + # 4 additional builtins (CONVERSION) + 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL', + # 35 additional builtins (UNSAFE) + 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC', + 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC', + 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR', + 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT', + 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC', + # 11 additional builtins (ATOMIC) + 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND', + 'BWNAND', 'BWOR', 'BWXOR', + # 7 additional builtins (COMPILER) + 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT', + 'HASH', + # 5 additional builtins (ASSEMBLER) + 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE', + ) + +# O b j e c t i v e M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Objective Modula-2 + objm2_lexemes_to_reject = ( + '!', '$', '%', '&', '<>', + ) + + # Objective Modula-2 Extensions + # reserved words in addition to Modula-2 R10 + objm2_additional_reserved_words = ( + # 16 additional reserved words + 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', + 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', + 'SUPER', 'TRY', + ) + + # Objective Modula-2 Extensions + # builtins in addition to Modula-2 R10 + objm2_additional_builtins = ( + # 3 additional builtins + 'OBJECT', 'NO', 'YES', + ) + + # Objective Modula-2 Extensions + # pseudo-module builtins in addition to Modula-2 R10 + objm2_additional_pseudo_builtins = ( + # None + ) + +# A g l e t M o d u l a - 2 D a t a s e t s + + # Aglet Extensions + # reserved words in addition to ISO Modula-2 + aglet_additional_reserved_words = ( + # None + ) + + # Aglet Extensions + # builtins in addition to ISO Modula-2 + aglet_additional_builtins = ( + # 9 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32', + ) + + # Aglet Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + aglet_additional_pseudo_builtins = ( + # None + ) + +# G N U M o d u l a - 2 D a t a s e t s + + # GNU Extensions + # reserved words in addition to PIM Modula-2 + gm2_additional_reserved_words = ( + # 10 additional reserved words + 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', + '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', + ) + + # GNU Extensions + # builtins in addition to PIM Modula-2 + gm2_additional_builtins = ( + # 21 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', + 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', + 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', + ) + + # GNU Extensions + # pseudo-module builtins in addition to PIM Modula-2 + gm2_additional_pseudo_builtins = ( + # None + ) + +# p 1 M o d u l a - 2 D a t a s e t s + + # p1 Extensions + # reserved words in addition to ISO Modula-2 + p1_additional_reserved_words = ( + # None + ) + + # p1 Extensions + # builtins in addition to ISO Modula-2 + p1_additional_builtins = ( + # None + ) + + # p1 Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + p1_additional_pseudo_builtins = ( + # 1 additional builtin + 'BCD', + ) + +# X D S M o d u l a - 2 D a t a s e t s + + # XDS Extensions + # reserved words in addition to ISO Modula-2 + xds_additional_reserved_words = ( + # 1 additional reserved word + 'SEQ', + ) + + # XDS Extensions + # builtins in addition to ISO Modula-2 + xds_additional_builtins = ( + # 9 additional builtins + 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN', + 'LONGCARD', 'SHORTCARD', 'SHORTINT', + ) + + # XDS Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + xds_additional_pseudo_builtins = ( + # 22 additional builtins (SYSTEM) + 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8', + 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE', + 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void' + # 3 additional builtins (COMPILER) + 'COMPILER', 'OPTION', 'EQUATION' + ) + +# P I M S t a n d a r d L i b r a r y D a t a s e t s + + # PIM Modula-2 Standard Library Modules Dataset + pim_stdlib_module_identifiers = ( + 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage', + ) + + # PIM Modula-2 Standard Library Types Dataset + pim_stdlib_type_identifiers = ( + 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission', + 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand', + 'DirectoryCommand', + ) + + # PIM Modula-2 Standard Library Procedures Dataset + pim_stdlib_proc_identifiers = ( + 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn', + 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite', + 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset', + 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar', + 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName', + 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput', + 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd', + 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd', + 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp', + 'ln', 'sin', 'cos', 'arctan', 'entier','ALLOCATE', 'DEALLOCATE', + ) + + # PIM Modula-2 Standard Library Variables Dataset + pim_stdlib_var_identifiers = ( + 'Done', 'termCH', 'in', 'out' + ) + + # PIM Modula-2 Standard Library Constants Dataset + pim_stdlib_const_identifiers = ( + 'EOL', + ) + +# I S O S t a n d a r d L i b r a r y D a t a s e t s + + # ISO Modula-2 Standard Library Modules Dataset + iso_stdlib_module_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Types Dataset + iso_stdlib_type_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Procedures Dataset + iso_stdlib_proc_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Variables Dataset + iso_stdlib_var_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Constants Dataset + iso_stdlib_const_identifiers = ( + # TO DO + ) + +# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s + + # Modula-2 R10 Standard Library ADTs Dataset + m2r10_stdlib_adt_identifiers = ( + 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET', + 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD', + 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT', + 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64', + 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8', + 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8', + 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16', + 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32', + 'INT64', 'INT128', 'STRING', 'UNISTRING', + ) + + # Modula-2 R10 Standard Library Blueprints Dataset + m2r10_stdlib_blueprint_identifiers = ( + 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar', + 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal', + 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray', + 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet', + 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet', + 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension', + 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath', + ) + + # Modula-2 R10 Standard Library Modules Dataset + m2r10_stdlib_module_identifiers = ( + 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO', + 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO', + 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath', + 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath', + 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport', + ) + + # Modula-2 R10 Standard Library Types Dataset + m2r10_stdlib_type_identifiers = ( + 'File', 'Status', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Procedures Dataset + m2r10_stdlib_proc_identifiers = ( + 'ALLOCATE', 'DEALLOCATE', 'SIZE', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Variables Dataset + m2r10_stdlib_var_identifiers = ( + 'stdIn', 'stdOut', 'stdErr', + ) + + # Modula-2 R10 Standard Library Constants Dataset + m2r10_stdlib_const_identifiers = ( + 'pi', 'tau', + ) + +# D i a l e c t s + + + # Dialect modes + dialects = ( + 'unknown', + 'm2pim', 'm2iso', 'm2r10', 'objm2', + 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds', + ) + +# D a t a b a s e s + + # Lexemes to Mark as Errors Database + lexemes_to_reject_db = { + # Lexemes to reject for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Lexemes to reject for PIM Modula-2 + 'm2pim' : ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for ISO Modula-2 + 'm2iso' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for Modula-2 R10 + 'm2r10' : ( + m2r10_lexemes_to_reject, + ), + # Lexemes to reject for Objective Modula-2 + 'objm2' : ( + objm2_lexemes_to_reject, + ), + # Lexemes to reject for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for GNU Modula-2 + 'm2pim+gm2' : ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for p1 Modula-2 + 'm2iso+p1' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for XDS Modula-2 + 'm2iso+xds' : ( + iso_lexemes_to_reject, + ), + } + + # Reserved Words Database + reserved_words_db = { + # Reserved words for unknown dialect + 'unknown' : ( + common_reserved_words, + pim_additional_reserved_words, + iso_additional_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for PIM Modula-2 + 'm2pim' : ( + common_reserved_words, + pim_additional_reserved_words, + ), + + # Reserved words for Modula-2 R10 + 'm2iso' : ( + common_reserved_words, + iso_additional_reserved_words, + ), + + # Reserved words for ISO Modula-2 + 'm2r10' : ( + common_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for Objective Modula-2 + 'objm2' : ( + common_reserved_words, + m2r10_additional_reserved_words, + objm2_additional_reserved_words, + ), + + # Reserved words for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_reserved_words, + iso_additional_reserved_words, + aglet_additional_reserved_words, + ), + + # Reserved words for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_reserved_words, + pim_additional_reserved_words, + gm2_additional_reserved_words, + ), + + # Reserved words for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_reserved_words, + iso_additional_reserved_words, + p1_additional_reserved_words, + ), + + # Reserved words for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_reserved_words, + iso_additional_reserved_words, + xds_additional_reserved_words, + ), + } + + # Builtins Database + builtins_db = { + # Builtins for unknown dialect + 'unknown' : ( + common_builtins, + pim_additional_builtins, + iso_additional_builtins, + m2r10_additional_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim' : ( + common_builtins, + pim_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso' : ( + common_builtins, + iso_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10' : ( + common_builtins, + m2r10_additional_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2' : ( + common_builtins, + m2r10_additional_builtins, + objm2_additional_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_builtins, + iso_additional_builtins, + aglet_additional_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_builtins, + pim_additional_builtins, + gm2_additional_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_builtins, + iso_additional_builtins, + p1_additional_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_builtins, + iso_additional_builtins, + xds_additional_builtins, + ), + } + + # Pseudo-Module Builtins Database + pseudo_builtins_db = { + # Builtins for unknown dialect + 'unknown' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + iso_additional_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10' : ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2' : ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + objm2_additional_pseudo_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + aglet_additional_pseudo_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + gm2_additional_pseudo_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + p1_additional_pseudo_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + xds_additional_pseudo_builtins, + ), + } + + # Standard Library ADTs Database + stdlib_adts_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library ADTs for PIM Modula-2 + 'm2pim' : ( + # No first class library types + ), + + # Standard Library ADTs for ISO Modula-2 + 'm2iso' : ( + # No first class library types + ), + + # Standard Library ADTs for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Aglet Modula-2 + 'm2iso+aglet' : ( + # No first class library types + ), + + # Standard Library ADTs for GNU Modula-2 + 'm2pim+gm2' : ( + # No first class library types + ), + + # Standard Library ADTs for p1 Modula-2 + 'm2iso+p1' : ( + # No first class library types + ), + + # Standard Library ADTs for XDS Modula-2 + 'm2iso+xds' : ( + # No first class library types + ), + } + + # Standard Library Modules Database + stdlib_modules_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Modules for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library Modules for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + ), + + # Standard Library Modules for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_module_identifiers, + ), + } + + # Standard Library Types Database + stdlib_types_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Types for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_type_identifiers, + ), + } + + # Standard Library Procedures Database + stdlib_procedures_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Procedures for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_proc_identifiers, + ), + } + + # Standard Library Variables Database + stdlib_variables_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Variables for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_var_identifiers, + ), + } + + # Standard Library Constants Database + stdlib_constants_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Constants for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_const_identifiers, + ), + } + +# M e t h o d s + + # initialise a lexer instance + def __init__(self, **options): + # + # Alias for unknown dialect + global UNKNOWN + UNKNOWN = self.dialects[0] + # + # check dialect options + # + dialects = get_list_opt(options, 'dialect', []) + # + for dialect_option in dialects: + if dialect_option in self.dialects[1:-1]: + # valid dialect option found + self.set_dialect(dialect_option) + break + # + # Fallback Mode (DEFAULT) + else: + # no valid dialect option + self.set_dialect(UNKNOWN) + # + self.dialect_set_by_tag = False + # + # check style options + # + styles = get_list_opt(options, 'style', []) + # + # use lowercase mode for Algol style + if 'algol' in styles or 'algol_nu' in styles: + self.algol_publication_mode = True + else: + self.algol_publication_mode = False + # + # Check option flags + # + self.treat_stdlib_adts_as_builtins = \ + get_bool_opt(options, 'treat_stdlib_adts_as_builtins', True) + # + # call superclass initialiser + RegexLexer.__init__(self, **options) + + # Set lexer to a specified dialect + def set_dialect(self, dialect_id): + # + #if __debug__: + # print 'entered set_dialect with arg: ', dialect_id + # + # check dialect name against known dialects + if dialect_id not in self.dialects: + dialect = UNKNOWN # default + else: + dialect = dialect_id + # + # compose lexemes to reject set + lexemes_to_reject_set = set() + # add each list of reject lexemes for this dialect + for list in self.lexemes_to_reject_db[dialect]: + lexemes_to_reject_set.update(set(list)) + # + # compose reserved words set + reswords_set = set() + # add each list of reserved words for this dialect + for list in self.reserved_words_db[dialect]: + reswords_set.update(set(list)) + # + # compose builtins set + builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.builtins_db[dialect]: + builtins_set.update(set(list).difference(reswords_set)) + # + # compose pseudo-builtins set + pseudo_builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.pseudo_builtins_db[dialect]: + pseudo_builtins_set.update(set(list).difference(reswords_set)) + # + # compose ADTs set + adts_set = set() + # add each list of ADTs for this dialect excluding reserved words + for list in self.stdlib_adts_db[dialect]: + adts_set.update(set(list).difference(reswords_set)) + # + # compose modules set + modules_set = set() + # add each list of builtins for this dialect excluding builtins + for list in self.stdlib_modules_db[dialect]: + modules_set.update(set(list).difference(builtins_set)) + # + # compose types set + types_set = set() + # add each list of types for this dialect excluding builtins + for list in self.stdlib_types_db[dialect]: + types_set.update(set(list).difference(builtins_set)) + # + # compose procedures set + procedures_set = set() + # add each list of procedures for this dialect excluding builtins + for list in self.stdlib_procedures_db[dialect]: + procedures_set.update(set(list).difference(builtins_set)) + # + # compose variables set + variables_set = set() + # add each list of variables for this dialect excluding builtins + for list in self.stdlib_variables_db[dialect]: + variables_set.update(set(list).difference(builtins_set)) + # + # compose constants set + constants_set = set() + # add each list of constants for this dialect excluding builtins + for list in self.stdlib_constants_db[dialect]: + constants_set.update(set(list).difference(builtins_set)) + # + # update lexer state + self.dialect = dialect + self.lexemes_to_reject = lexemes_to_reject_set + self.reserved_words = reswords_set + self.builtins = builtins_set + self.pseudo_builtins = pseudo_builtins_set + self.adts = adts_set + self.modules = modules_set + self.types = types_set + self.procedures = procedures_set + self.variables = variables_set + self.constants = constants_set + # + #if __debug__: + # print 'exiting set_dialect' + # print ' self.dialect: ', self.dialect + # print ' self.lexemes_to_reject: ', self.lexemes_to_reject + # print ' self.reserved_words: ', self.reserved_words + # print ' self.builtins: ', self.builtins + # print ' self.pseudo_builtins: ', self.pseudo_builtins + # print ' self.adts: ', self.adts + # print ' self.modules: ', self.modules + # print ' self.types: ', self.types + # print ' self.procedures: ', self.procedures + # print ' self.variables: ', self.variables + # print ' self.types: ', self.types + # print ' self.constants: ', self.constants + + # Extracts a dialect name from a dialect tag comment string and checks + # the extracted name against known dialects. If a match is found, the + # matching name is returned, otherwise dialect id 'unknown' is returned + def get_dialect_from_dialect_tag(self, dialect_tag): + # + #if __debug__: + # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag + # + # constants + left_tag_delim = '(*!' + right_tag_delim = '*)' + left_tag_delim_len = len(left_tag_delim) + right_tag_delim_len = len(right_tag_delim) + indicator_start = left_tag_delim_len + indicator_end = -(right_tag_delim_len) + # + # check comment string for dialect indicator + if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \ + and dialect_tag.startswith(left_tag_delim) \ + and dialect_tag.endswith(right_tag_delim): + # + #if __debug__: + # print 'dialect tag found' + # + # extract dialect indicator + indicator = dialect_tag[indicator_start:indicator_end] + # + #if __debug__: + # print 'extracted: ', indicator + # + # check against known dialects + for index in range(1, len(self.dialects)): + # + #if __debug__: + # print 'dialects[', index, ']: ', self.dialects[index] + # + if indicator == self.dialects[index]: + # + #if __debug__: + # print 'matching dialect found' + # + # indicator matches known dialect + return indicator + else: + # indicator does not match any dialect + return UNKNOWN # default + else: + # invalid indicator string + return UNKNOWN # default + + # intercept the token stream, modify token attributes and return them + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + # + # check for dialect tag if dialect has not been set by tag + if not self.dialect_set_by_tag and token == Comment.Special: + indicated_dialect = self.get_dialect_from_dialect_tag(value) + if indicated_dialect != UNKNOWN: + # token is a dialect indicator + # reset reserved words and builtins + self.set_dialect(indicated_dialect) + self.dialect_set_by_tag = True + # + # check for reserved words, predefined and stdlib identifiers + if token is Name: + if value in self.reserved_words: + token = Keyword.Reserved + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.builtins: + token = Name.Builtin + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.pseudo_builtins: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.adts: + if not self.treat_stdlib_adts_as_builtins: + token = Name.Namespace + else: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.modules: + token = Name.Namespace + # + elif value in self.types: + token = Name.Class + # + elif value in self.procedures: + token = Name.Function + # + elif value in self.variables: + token = Name.Variable + # + elif value in self.constants: + token = Name.Constant + # + elif token in Number: + # + # mark prefix number literals as error for PIM and ISO dialects + if self.dialect not in (UNKNOWN, 'm2r10', 'objm2'): + if "'" in value or value[0:2] in ('0b', '0x', '0u'): + token = Error + # + elif self.dialect in ('m2r10', 'objm2'): + # mark base-8 number literals as errors for M2 R10 and ObjM2 + if token is Number.Oct: + token = Error + # mark suffix base-16 literals as errors for M2 R10 and ObjM2 + elif token is Number.Hex and 'H' in value: + token = Error + # mark real numbers with E as errors for M2 R10 and ObjM2 + elif token is Number.Float and 'E' in value: + token = Error + # + elif token in Comment: + # + # mark single line comment as error for PIM and ISO dialects + if token is Comment.Single: + if self.dialect not in [UNKNOWN, 'm2r10', 'objm2']: + token = Error + # + if token is Comment.Preproc: + # mark ISO pragma as error for PIM dialects + if value.startswith('<*') and \ + self.dialect.startswith('m2pim'): + token = Error + # mark PIM pragma as comment for other dialects + elif value.startswith('(*$') and \ + self.dialect != UNKNOWN and \ + not self.dialect.startswith('m2pim'): + token = Comment.Multiline + # + else: # token is neither Name nor Comment + # + # mark lexemes matching the dialect's error token set as errors + if value in self.lexemes_to_reject: + token = Error + # + # substitute lexemes when in Algol mode + if self.algol_publication_mode: + if value == '#': + value = u'≠' + elif value == '<=': + value = u'≤' + elif value == '>=': + value = u'≥' + elif value == '==': + value = u'≡' + elif value == '*.': + value = u'•' + + # return result + yield index, token, value diff --git a/pygments/lexers/parasail.py b/pygments/lexers/parasail.py new file mode 100644 index 00000000..3cfffbee --- /dev/null +++ b/pygments/lexers/parasail.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.parasail + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for ParaSail. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ + this, combined, inherit, do_insertions, default +from pygments.util import get_bool_opt, get_list_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Literal + +__all__ = ['ParaSailLexer'] + + +class ParaSailLexer(RegexLexer): + """ + For `ParaSail <http://www.parasail-lang.org>`_ source code. + + .. versionadded:: 2.1 + """ + + name = 'ParaSail' + aliases = ['parasail'] + filenames = ['*.psi', '*.psl'] + mimetypes = ['text/x-parasail'] + + flags = re.MULTILINE + + tokens = { + 'root': [ + (r'[^\S\n]+', Text), + (r'//.*?\n', Comment.Single), + (r'\b(and|or|xor)=', Operator.Word), + (r'\b(and(\s+then)?|or(\s+else)?|xor|rem|mod|' + r'(is|not)\s+null)\b', + Operator.Word), + # Keywords + (r'\b(abs|abstract|all|block|class|concurrent|const|continue|' + r'each|end|exit|extends|exports|forward|func|global|implements|' + r'import|in|interface|is|lambda|locked|new|not|null|of|op|' + r'optional|private|queued|ref|return|reverse|separate|some|' + r'type|until|var|with|' + # Control flow + r'if|then|else|elsif|case|for|while|loop)\b', + Keyword.Reserved), + (r'(abstract\s+)?(interface|class|op|func|type)', + Keyword.Declaration), + # Literals + (r'"[^"]*"', String), + (r'\\[\'ntrf"0]', String.Escape), + (r'#[a-zA-Z]\w*', Literal), #Enumeration + include('numbers'), + (r"'[^']'", String.Char), + (r'[a-zA-Z]\w*', Name), + # Operators and Punctuation + (r'(<==|==>|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|' + r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\||\|=|/=|\+|-|\*|/|' + r'\.\.|<\.\.|\.\.<|<\.\.<)', + Operator), + (r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)', + Punctuation), + (r'\n+', Text), + ], + 'numbers' : [ + (r'\d[0-9_]*#[0-9a-fA-F][0-9a-fA-F_]*#', Number.Hex), # any base + (r'0[xX][0-9a-fA-F][0-9a-fA-F_]*', Number.Hex), # C-like hex + (r'0[bB][01][01_]*', Number.Bin), # C-like bin + (r'\d[0-9_]*\.\d[0-9_]*[eE][+-]\d[0-9_]*', # float exp + Number.Float), + (r'\d[0-9_]*\.\d[0-9_]*', Number.Float), # float + (r'\d[0-9_]*', Number.Integer), # integer + ], + } diff --git a/pygments/lexers/pascal.py b/pygments/lexers/pascal.py index 2895fba7..d3ce6a3a 100644 --- a/pygments/lexers/pascal.py +++ b/pygments/lexers/pascal.py @@ -18,7 +18,9 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Error from pygments.scanner import Scanner -__all__ = ['DelphiLexer', 'Modula2Lexer', 'AdaLexer'] +from pygments.lexers.modula2 import Modula2Lexer + +__all__ = ['DelphiLexer', 'AdaLexer'] class DelphiLexer(Lexer): @@ -505,198 +507,6 @@ class DelphiLexer(Lexer): yield scanner.start_pos, token, scanner.match or '' -class Modula2Lexer(RegexLexer): - """ - For `Modula-2 <http://www.modula2.org/>`_ source code. - - Additional options that determine which keywords are highlighted: - - `pim` - Select PIM Modula-2 dialect (default: True). - `iso` - Select ISO Modula-2 dialect (default: False). - `objm2` - Select Objective Modula-2 dialect (default: False). - `gm2ext` - Also highlight GNU extensions (default: False). - - .. versionadded:: 1.3 - """ - name = 'Modula-2' - aliases = ['modula2', 'm2'] - filenames = ['*.def', '*.mod'] - mimetypes = ['text/x-modula2'] - - flags = re.MULTILINE | re.DOTALL - - tokens = { - 'whitespace': [ - (r'\n+', Text), # blank lines - (r'\s+', Text), # whitespace - ], - 'identifiers': [ - (r'([a-zA-Z_$][\w$]*)', Name), - ], - 'numliterals': [ - (r'[01]+B', Number.Bin), # binary number (ObjM2) - (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) - (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) - (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) - (r'[0-9A-F]+H', Number.Hex), # hexadecimal number - (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number - (r'[0-9]+\.[0-9]+', Number.Float), # real number - (r'[0-9]+', Number.Integer), # decimal whole number - ], - 'strings': [ - (r"'(\\\\|\\'|[^'])*'", String), # single quoted string - (r'"(\\\\|\\"|[^"])*"', String), # double quoted string - ], - 'operators': [ - (r'[*/+=#~&<>\^-]', Operator), - (r':=', Operator), # assignment - (r'@', Operator), # pointer deref (ISO) - (r'\.\.', Operator), # ellipsis or range - (r'`', Operator), # Smalltalk message (ObjM2) - (r'::', Operator), # type conversion (ObjM2) - ], - 'punctuation': [ - (r'[()\[\]{},.:;|]', Punctuation), - ], - 'comments': [ - (r'//.*?\n', Comment.Single), # ObjM2 - (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 - (r'\(\*([^$].*?)\*\)', Comment.Multiline), - # TO DO: nesting of (* ... *) comments - ], - 'pragmas': [ - (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM - (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 - ], - 'root': [ - include('whitespace'), - include('comments'), - include('pragmas'), - include('identifiers'), - include('numliterals'), - include('strings'), - include('operators'), - include('punctuation'), - ] - } - - pim_reserved_words = [ - # 40 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', - 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', - 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', - 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', - 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', - 'UNTIL', 'VAR', 'WHILE', 'WITH', - ] - - pim_pervasives = [ - # 31 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', - 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', - 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', - 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', - ] - - iso_reserved_words = [ - # 46 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', - 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', - 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', - 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', - 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', - 'WITH', - ] - - iso_pervasives = [ - # 42 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', - 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', - 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', - 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', - 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', - 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', - ] - - objm2_reserved_words = [ - # base language, 42 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', - 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', - 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', - 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', - 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', - # OO extensions, 16 reserved words - 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', - 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', - 'SUPER', 'TRY', - ] - - objm2_pervasives = [ - # base language, 38 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', - 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', - 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', - 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', - 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', - # OO extensions, 3 pervasives - 'OBJECT', 'NO', 'YES', - ] - - gnu_reserved_words = [ - # 10 additional reserved words - 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', - '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', - ] - - gnu_pervasives = [ - # 21 identifiers, actually from pseudo-module SYSTEM - # but we will highlight them as if they were pervasives - 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', - 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', - 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', - 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', - ] - - def __init__(self, **options): - self.reserved_words = set() - self.pervasives = set() - # ISO Modula-2 - if get_bool_opt(options, 'iso', False): - self.reserved_words.update(self.iso_reserved_words) - self.pervasives.update(self.iso_pervasives) - # Objective Modula-2 - elif get_bool_opt(options, 'objm2', False): - self.reserved_words.update(self.objm2_reserved_words) - self.pervasives.update(self.objm2_pervasives) - # PIM Modula-2 (DEFAULT) - else: - self.reserved_words.update(self.pim_reserved_words) - self.pervasives.update(self.pim_pervasives) - # GNU extensions - if get_bool_opt(options, 'gm2ext', False): - self.reserved_words.update(self.gnu_reserved_words) - self.pervasives.update(self.gnu_pervasives) - # initialise - RegexLexer.__init__(self, **options) - - def get_tokens_unprocessed(self, text): - for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): - # check for reserved words and pervasives - if token is Name: - if value in self.reserved_words: - token = Keyword.Reserved - elif value in self.pervasives: - token = Keyword.Pervasive - # return result - yield index, token, value - - class AdaLexer(RegexLexer): """ For Ada source code. diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py index 7e70b3ee..b78963d0 100644 --- a/pygments/lexers/perl.py +++ b/pygments/lexers/perl.py @@ -46,6 +46,7 @@ class PerlLexer(RegexLexer): (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'), ], 'root': [ + (r'\A\#!.+?$', Comment.Hashbang), (r'\#.*?$', Comment.Single), (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), (words(( diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py index 2b1c7634..7d32d7f6 100644 --- a/pygments/lexers/prolog.py +++ b/pygments/lexers/prolog.py @@ -155,11 +155,11 @@ class LogtalkLexer(RegexLexer): # Term creation and decomposition (r'(functor|arg|copy_term|numbervars|term_variables)(?=[(])', Keyword), # Evaluable functors - (r'(rem|m(ax|in|od)|abs|sign)(?=[(])', Keyword), + (r'(div|rem|m(ax|in|od)|abs|sign)(?=[(])', Keyword), (r'float(_(integer|fractional)_part)?(?=[(])', Keyword), - (r'(floor|truncate|round|ceiling)(?=[(])', Keyword), + (r'(floor|t(an|runcate)|round|ceiling)(?=[(])', Keyword), # Other arithmetic functors - (r'(cos|a(cos|sin|tan)|exp|log|s(in|qrt))(?=[(])', Keyword), + (r'(cos|a(cos|sin|tan|tan2)|exp|log|s(in|qrt)|xor)(?=[(])', Keyword), # Term testing (r'(var|atom(ic)?|integer|float|c(allable|ompound)|n(onvar|umber)|' r'ground|acyclic_term)(?=[(])', Keyword), @@ -212,7 +212,7 @@ class LogtalkLexer(RegexLexer): (r'(==|\\==|@=<|@<|@>=|@>)', Operator), # Evaluable functors (r'(//|[-+*/])', Operator), - (r'\b(e|pi|mod|rem)\b', Operator), + (r'\b(e|pi|div|mod|rem)\b', Operator), # Other arithemtic functors (r'\b\*\*\b', Operator), # DCG rules diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 3c1aff56..ea97b855 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -41,7 +41,8 @@ class PythonLexer(RegexLexer): (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), - (r'#.*$', Comment), + (r'\A#!.+$', Comment.Hashbang), + (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), @@ -155,10 +156,11 @@ class PythonLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ + # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', String), - # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), # unhandled string formatting sign (r'%', String) @@ -213,11 +215,14 @@ class Python3Lexer(RegexLexer): tokens = PythonLexer.tokens.copy() tokens['keywords'] = [ (words(( - 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', - 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', - 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', - 'with', 'True', 'False', 'None'), suffix=r'\b'), + 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', + 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', + 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', + 'as', 'with'), suffix=r'\b'), Keyword), + (words(( + 'True', 'False', 'None'), suffix=r'\b'), + Keyword.Constant), ] tokens['builtins'] = [ (words(( @@ -241,7 +246,7 @@ class Python3Lexer(RegexLexer): 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', - 'PendingDeprecationWarning', 'ReferenceError', + 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning', 'RuntimeError', 'RuntimeWarning', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', @@ -267,6 +272,7 @@ class Python3Lexer(RegexLexer): tokens['backtick'] = [] tokens['name'] = [ (r'@\w+', Name.Decorator), + (r'@', Operator), # new matrix multiplication operator (uni_name, Name), ] tokens['funcname'] = [ @@ -288,13 +294,21 @@ class Python3Lexer(RegexLexer): (uni_name, Name.Namespace), default('#pop'), ] - # don't highlight "%s" substitutions tokens['strings'] = [ - (r'[^\\\'"%\n]+', String), - # quotes, percents and backslashes must be parsed one at a time + # the old style '%s' % (...) string formatting (still valid in Py3) + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # the new style '{}'.format(...) string formatting + (r'\{' + '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + '(\![sra])?' # conversion + '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' + '\}', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"%\{\n]+', String), (r'[\'"\\]', String), # unhandled string formatting sign - (r'%', String) + (r'%|(\{{1,2})', String) # newlines are an error (use "nl" state) ] diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index fb14629a..4f2037bf 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -12,10 +12,10 @@ import re from pygments.lexer import RegexLexer, bygroups, default -from pygments.token import Keyword, Punctuation, String, Number, Operator, \ +from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \ Whitespace, Name, Literal, Comment, Text -__all__ = ['SparqlLexer'] +__all__ = ['SparqlLexer', 'TurtleLexer'] class SparqlLexer(RegexLexer): @@ -97,3 +97,97 @@ class SparqlLexer(RegexLexer): default('#pop:2'), ], } + + +class TurtleLexer(RegexLexer): + """ + Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language. + + .. versionadded:: 2.0 + """ + name = 'Turtle' + aliases = ['turtle'] + filenames = ['*.ttl'] + mimetypes = ['text/turtle', 'application/x-turtle'] + + flags = re.IGNORECASE + + patterns = { + 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range + 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' + } + + # PNAME_NS PN_LOCAL (with simplified character range) + patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns + + tokens = { + 'root': [ + (r'\s+', Whitespace), + + # Base / prefix + (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, + bygroups(Keyword, Whitespace, Name.Variable, Whitespace, + Punctuation)), + (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, + bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, + Name.Variable, Whitespace, Punctuation)), + + # The shorthand predicate 'a' + (r'(?<=\s)a(?=\s)', Keyword.Type), + + # IRIREF + (r'%(IRIREF)s' % patterns, Name.Variable), + + # PrefixedName + (r'%(PrefixedName)s' % patterns, + bygroups(Name.Namespace, Name.Tag)), + + # Comment + (r'#[^\n]+', Comment), + + (r'\b(true|false)\b', Literal), + (r'[+\-]?\d*\.\d+', Number.Float), + (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), + (r'[+\-]?\d+', Number.Integer), + (r'[\[\](){}.;,:^]', Punctuation), + + (r'"""', String, 'triple-double-quoted-string'), + (r'"', String, 'single-double-quoted-string'), + (r"'''", String, 'triple-single-quoted-string'), + (r"'", String, 'single-single-quoted-string'), + ], + 'triple-double-quoted-string': [ + (r'"""', String, 'end-of-string'), + (r'[^\\]+', String), + (r'\\', String, 'string-escape'), + ], + 'single-double-quoted-string': [ + (r'"', String, 'end-of-string'), + (r'[^"\\\n]+', String), + (r'\\', String, 'string-escape'), + ], + 'triple-single-quoted-string': [ + (r"'''", String, 'end-of-string'), + (r'[^\\]+', String), + (r'\\', String, 'string-escape'), + ], + 'single-single-quoted-string': [ + (r"'", String, 'end-of-string'), + (r"[^'\\\n]+", String), + (r'\\', String, 'string-escape'), + ], + 'string-escape': [ + (r'.', String, '#pop'), + ], + 'end-of-string': [ + + (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)', + bygroups(Operator, Generic.Emph), '#pop:2'), + + (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), + (r'(\^\^)%(PrefixedName)s' % patterns, bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'), + + default('#pop:2'), + + ], + } diff --git a/pygments/lexers/robotframework.py b/pygments/lexers/robotframework.py index 56996fa8..eab06efe 100644 --- a/pygments/lexers/robotframework.py +++ b/pygments/lexers/robotframework.py @@ -87,7 +87,7 @@ class RobotFrameworkLexer(Lexer): class VariableTokenizer(object): def tokenize(self, string, token): - var = VariableSplitter(string, identifiers='$@%') + var = VariableSplitter(string, identifiers='$@%&') if var.start < 0 or token in (COMMENT, ERROR): yield string, token return @@ -205,7 +205,7 @@ class Tokenizer(object): def _is_assign(self, value): if value.endswith('='): value = value[:-1].strip() - var = VariableSplitter(value, identifiers='$@') + var = VariableSplitter(value, identifiers='$@&') return var.start == 0 and var.end == len(value) @@ -262,7 +262,7 @@ class TestCaseSetting(Setting): class KeywordSetting(TestCaseSetting): _keyword_settings = ('teardown',) - _other_settings = ('documentation', 'arguments', 'return', 'timeout') + _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags') class Variable(Tokenizer): @@ -465,13 +465,13 @@ class VariableSplitter: self.identifier = self._variable_chars[0] self.base = ''.join(self._variable_chars[2:-1]) self.end = self.start + len(self._variable_chars) - if self._has_list_variable_index(): - self.index = ''.join(self._list_variable_index_chars[1:-1]) - self.end += len(self._list_variable_index_chars) + if self._has_list_or_dict_variable_index(): + self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1]) + self.end += len(self._list_and_dict_variable_index_chars) - def _has_list_variable_index(self): - return self._list_variable_index_chars\ - and self._list_variable_index_chars[-1] == ']' + def _has_list_or_dict_variable_index(self): + return self._list_and_dict_variable_index_chars\ + and self._list_and_dict_variable_index_chars[-1] == ']' def _split(self, string): start_index, max_index = self._find_variable(string) @@ -479,7 +479,7 @@ class VariableSplitter: self._open_curly = 1 self._state = self._variable_state self._variable_chars = [string[start_index], '{'] - self._list_variable_index_chars = [] + self._list_and_dict_variable_index_chars = [] self._string = string start_index += 2 for index, char in enumerate(string[start_index:]): @@ -530,14 +530,14 @@ class VariableSplitter: if char == '}' and not self._is_escaped(self._string, index): self._open_curly -= 1 if self._open_curly == 0: - if not self._is_list_variable(): + if not self._is_list_or_dict_variable(): raise StopIteration self._state = self._waiting_list_variable_index_state elif char in self._identifiers: self._state = self._internal_variable_start_state - def _is_list_variable(self): - return self._variable_chars[0] == '@' + def _is_list_or_dict_variable(self): + return self._variable_chars[0] in ('@','&') def _internal_variable_start_state(self, char, index): self._state = self._variable_state @@ -551,10 +551,10 @@ class VariableSplitter: def _waiting_list_variable_index_state(self, char, index): if char != '[': raise StopIteration - self._list_variable_index_chars.append(char) + self._list_and_dict_variable_index_chars.append(char) self._state = self._list_variable_index_state def _list_variable_index_state(self, char, index): - self._list_variable_index_chars.append(char) + self._list_and_dict_variable_index_chars.append(char) if char == ']': raise StopIteration diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index 74edd7ae..e81d6ecf 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -190,6 +190,7 @@ class RubyLexer(ExtendedRegexLexer): tokens = { 'root': [ + (r'\A#!.+?$', Comment.Hashbang), (r'#.*?$', Comment.Single), (r'=begin\s.*?\n=end.*?$', Comment.Multiline), # keywords @@ -256,13 +257,13 @@ class RubyLexer(ExtendedRegexLexer): r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' r'(?<=(?:\s|;)and\s)|' - r'(?<=(?:\s|;|\.)index\s)|' - r'(?<=(?:\s|;|\.)scan\s)|' - r'(?<=(?:\s|;|\.)sub\s)|' - r'(?<=(?:\s|;|\.)sub!\s)|' - r'(?<=(?:\s|;|\.)gsub\s)|' - r'(?<=(?:\s|;|\.)gsub!\s)|' - r'(?<=(?:\s|;|\.)match\s)|' + r'(?<=\.index\s)|' + r'(?<=\.scan\s)|' + r'(?<=\.sub\s)|' + r'(?<=\.sub!\s)|' + r'(?<=\.gsub\s)|' + r'(?<=\.gsub!\s)|' + r'(?<=\.match\s)|' r'(?<=(?:\s|;)if\s)|' r'(?<=(?:\s|;)elsif\s)|' r'(?<=^when\s)|' diff --git a/pygments/lexers/rust.py b/pygments/lexers/rust.py index 2ca860d6..d8939678 100644 --- a/pygments/lexers/rust.py +++ b/pygments/lexers/rust.py @@ -10,7 +10,7 @@ """ from pygments.lexer import RegexLexer, include, bygroups, words, default -from pygments.token import Comment, Operator, Keyword, Name, String, \ +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Whitespace __all__ = ['RustLexer'] @@ -18,33 +18,40 @@ __all__ = ['RustLexer'] class RustLexer(RegexLexer): """ - Lexer for the Rust programming language (version 0.9). + Lexer for the Rust programming language (version 1.0). .. versionadded:: 1.6 """ name = 'Rust' filenames = ['*.rs'] aliases = ['rust'] - mimetypes = ['text/x-rustsrc'] + mimetypes = ['text/rust'] tokens = { 'root': [ + # rust allows a file to start with a shebang, but if the first line + # starts with #![ then it’s not a shebang but a crate attribute. + (r'#![^[\r\n].*$', Comment.Preproc), + default('base'), + ], + 'base': [ # Whitespace and Comments (r'\n', Whitespace), (r'\s+', Whitespace), - (r'//[/!](.*?)\n', Comment.Doc), + (r'//!.*?\n', String.Doc), + (r'///(\n|[^/].*?\n)', String.Doc), (r'//(.*?)\n', Comment.Single), + (r'/\*\*(\n|[^/*])', String.Doc, 'doccomment'), + (r'/\*!', String.Doc, 'doccomment'), (r'/\*', Comment.Multiline, 'comment'), - # Lifetime - (r"""'[a-zA-Z_]\w*""", Name.Label), # Macro parameters (r"""\$([a-zA-Z_]\w*|\(,?|\),?|,?)""", Comment.Preproc), # Keywords (words(( - 'as', 'box', 'break', 'continue', 'do', 'else', 'enum', 'extern', + 'as', 'box', 'crate', 'do', 'else', 'enum', 'extern', # break and continue are in labels 'fn', 'for', 'if', 'impl', 'in', 'loop', 'match', 'mut', 'priv', - 'proc', 'pub', 'ref', 'return', 'static', '\'static', 'struct', + 'proc', 'pub', 'ref', 'return', 'static', 'struct', 'trait', 'true', 'type', 'unsafe', 'while'), suffix=r'\b'), Keyword), (words(('alignof', 'be', 'const', 'offsetof', 'pure', 'sizeof', @@ -53,44 +60,45 @@ class RustLexer(RegexLexer): (r'(mod|use)\b', Keyword.Namespace), (r'(true|false)\b', Keyword.Constant), (r'let\b', Keyword.Declaration), - (words(('u8', 'u16', 'u32', 'u64', 'i8', 'i16', 'i32', 'i64', 'uint', - 'int', 'f32', 'f64', 'str', 'bool'), suffix=r'\b'), + (words(('u8', 'u16', 'u32', 'u64', 'i8', 'i16', 'i32', 'i64', 'usize', + 'isize', 'f32', 'f64', 'str', 'bool'), suffix=r'\b'), Keyword.Type), (r'self\b', Name.Builtin.Pseudo), - # Prelude + # Prelude (taken from Rust’s src/libstd/prelude.rs) (words(( - 'Freeze', 'Pod', 'Send', 'Sized', 'Add', 'Sub', 'Mul', 'Div', 'Rem', 'Neg', 'Not', 'BitAnd', - 'BitOr', 'BitXor', 'Drop', 'Shl', 'Shr', 'Index', 'Option', 'Some', 'None', 'Result', - 'Ok', 'Err', 'from_str', 'range', 'print', 'println', 'Any', 'AnyOwnExt', 'AnyRefExt', - 'AnyMutRefExt', 'Ascii', 'AsciiCast', 'OnwedAsciiCast', 'AsciiStr', - 'IntoBytes', 'Bool', 'ToCStr', 'Char', 'Clone', 'DeepClone', 'Eq', 'ApproxEq', - 'Ord', 'TotalEq', 'Ordering', 'Less', 'Equal', 'Greater', 'Equiv', 'Container', - 'Mutable', 'Map', 'MutableMap', 'Set', 'MutableSet', 'Default', 'FromStr', - 'Hash', 'FromIterator', 'Extendable', 'Iterator', 'DoubleEndedIterator', - 'RandomAccessIterator', 'CloneableIterator', 'OrdIterator', - 'MutableDoubleEndedIterator', 'ExactSize', 'Times', 'Algebraic', - 'Trigonometric', 'Exponential', 'Hyperbolic', 'Bitwise', 'BitCount', - 'Bounded', 'Integer', 'Fractional', 'Real', 'RealExt', 'Num', 'NumCast', - 'CheckedAdd', 'CheckedSub', 'CheckedMul', 'Orderable', 'Signed', - 'Unsigned', 'Round', 'Primitive', 'Int', 'Float', 'ToStrRadix', - 'ToPrimitive', 'FromPrimitive', 'GenericPath', 'Path', 'PosixPath', - 'WindowsPath', 'RawPtr', 'Buffer', 'Writer', 'Reader', 'Seek', - 'SendStr', 'SendStrOwned', 'SendStrStatic', 'IntoSendStr', 'Str', - 'StrVector', 'StrSlice', 'OwnedStr', 'IterBytes', 'ToStr', 'IntoStr', - 'CopyableTuple', 'ImmutableTuple', 'ImmutableEqVector', 'ImmutableTotalOrdVector', - 'ImmutableCopyableVector', 'OwnedVector', 'OwnedCopyableVector', - 'OwnedEqVector', 'MutableVector', 'MutableTotalOrdVector', - 'Vector', 'VectorVector', 'CopyableVector', 'ImmutableVector', - 'Port', 'Chan', 'SharedChan', 'spawn', 'drop'), suffix=r'\b'), + # Reexported core operators + 'Copy', 'Send', 'Sized', 'Sync', + 'Drop', 'Fn', 'FnMut', 'FnOnce', + + # Reexported functions + 'drop', + + # Reexported types and traits + 'Box', + 'ToOwned', + 'Clone', + 'PartialEq', 'PartialOrd', 'Eq', 'Ord', + 'AsRef', 'AsMut', 'Into', 'From', + 'Default', + 'Iterator', 'Extend', 'IntoIterator', + 'DoubleEndedIterator', 'ExactSizeIterator', + 'Option', + 'Some', 'None', + 'Result', + 'Ok', 'Err', + 'SliceConcatExt', + 'String', 'ToString', + 'Vec', + ), suffix=r'\b'), Name.Builtin), - (r'(ImmutableTuple\d+|Tuple\d+)\b', Name.Builtin), - # Borrowed pointer - (r'(&)(\'[A-Za-z_]\w*)?', bygroups(Operator, Name)), # Labels - (r'\'[A-Za-z_]\w*:', Name.Label), + (r'(break|continue)(\s*)(\'[A-Za-z_]\w*)?', bygroups(Keyword, Text.Whitespace, Name.Label)), # Character Literal - (r"""'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}""" - r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""", + (r"""'(\\['"\\nrt]|\\x[0-7][0-9a-fA-F]|\\0""" + r"""|\\u\{[0-9a-fA-F]{1,6}\}|.)'""", + String.Char), + (r"""b'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\0""" + r"""|\\u\{[0-9a-fA-F]{1,6}\}|.)'""", String.Char), # Binary Literal (r'0b[01_]+', Number.Bin, 'number_lit'), @@ -100,11 +108,16 @@ class RustLexer(RegexLexer): (r'0[xX][0-9a-fA-F_]+', Number.Hex, 'number_lit'), # Decimal Literal (r'[0-9][0-9_]*(\.[0-9_]+[eE][+\-]?[0-9_]+|' - r'\.[0-9_]*|[eE][+\-]?[0-9_]+)', Number.Float, 'number_lit'), + r'\.[0-9_]*(?!\.)|[eE][+\-]?[0-9_]+)', Number.Float, 'number_lit'), (r'[0-9][0-9_]*', Number.Integer, 'number_lit'), # String Literal + (r'b"', String, 'bytestring'), (r'"', String, 'string'), - (r'r(#*)".*?"\1', String.Raw), + (r'b?r(#*)".*?"\1', String), + + # Lifetime + (r"""'static""", Name.Builtin), + (r"""'[a-zA-Z_]\w*""", Name.Attribute), # Operators and Punctuation (r'[{}()\[\],.;]', Punctuation), @@ -129,18 +142,28 @@ class RustLexer(RegexLexer): (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], + 'doccomment': [ + (r'[^*/]+', String.Doc), + (r'/\*', String.Doc, '#push'), + (r'\*/', String.Doc, '#pop'), + (r'[*/]', String.Doc), + ], 'number_lit': [ - (r'[ui](8|16|32|64)', Keyword, '#pop'), + (r'[ui](8|16|32|64|size)', Keyword, '#pop'), (r'f(32|64)', Keyword, '#pop'), default('#pop'), ], 'string': [ (r'"', String, '#pop'), - (r"""\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}""" - r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}""", String.Escape), + (r"""\\['"\\nrt]|\\x[0-7][0-9a-fA-F]|\\0""" + r"""|\\u\{[0-9a-fA-F]{1,6}\}""", String.Escape), (r'[^\\"]+', String), (r'\\', String), ], + 'bytestring': [ + (r"""\\x[89a-fA-F][0-9a-fA-F]""", String.Escape), + include('string'), + ], 'macro{': [ (r'\{', Operator, '#push'), (r'\}', Operator, '#pop'), diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py index 473ea7eb..c09c5ba9 100644 --- a/pygments/lexers/scripting.py +++ b/pygments/lexers/scripting.py @@ -14,11 +14,12 @@ import re from pygments.lexer import RegexLexer, include, bygroups, default, combined, \ words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Error, Whitespace + Number, Punctuation, Error, Whitespace, Other from pygments.util import get_bool_opt, get_list_opt, iteritems __all__ = ['LuaLexer', 'MoonScriptLexer', 'ChaiscriptLexer', 'LSLLexer', - 'AppleScriptLexer', 'RexxLexer', 'MOOCodeLexer', 'HybrisLexer'] + 'AppleScriptLexer', 'RexxLexer', 'MOOCodeLexer', 'HybrisLexer', + 'EasytrieveLexer', 'JclLexer'] class LuaLexer(RegexLexer): @@ -921,3 +922,275 @@ class HybrisLexer(RegexLexer): (r'[\w.]+\*?', Name.Namespace, '#pop') ], } + + +class EasytrieveLexer(RegexLexer): + """ + Easytrieve Plus is a programming language for extracting, filtering and + converting sequential data. Furthermore it can layout data for reports. + It is mainly used on mainframe platforms and can access several of the + mainframe's native file formats. It is somewhat comparable to awk. + + .. versionadded:: 2.1 + """ + name = 'Easytrieve' + aliases = ['easytrieve'] + filenames = ['*.ezt', '*.mac'] + mimetypes = ['text/x-easytrieve'] + flags = 0 + + # Note: We cannot use r'\b' at the start and end of keywords because + # Easytrieve Plus delimiter characters are: + # + # * space ( ) + # * apostrophe (') + # * period (.) + # * comma (,) + # * paranthesis ( and ) + # * colon (:) + # + # Additionally words end once a '*' appears, indicatins a comment. + _DELIMITERS = r' \'.,():\n' + _DELIMITERS_OR_COMENT = _DELIMITERS + '*' + _DELIMITER_PATTERN = '[' + _DELIMITERS + ']' + _DELIMITER_PATTERN_CAPTURE = '(' + _DELIMITER_PATTERN + ')' + _NON_DELIMITER_OR_COMMENT_PATTERN = '[^' + _DELIMITERS_OR_COMENT + ']' + _OPERATORS_PATTERN = u'[.+\\-/=\\[\\](){}<>;,&%¬]' + _KEYWORDS = [ + 'AFTER-BREAK', 'AFTER-LINE', 'AFTER-SCREEN', 'AIM', 'AND', 'ATTR', + 'BEFORE', 'BEFORE-BREAK', 'BEFORE-LINE', 'BEFORE-SCREEN', 'BUSHU', + 'BY', 'CALL', 'CASE', 'CHECKPOINT', 'CHKP', 'CHKP-STATUS', 'CLEAR', + 'CLOSE', 'COL', 'COLOR', 'COMMIT', 'CONTROL', 'COPY', 'CURSOR', 'D', + 'DECLARE', 'DEFAULT', 'DEFINE', 'DELETE', 'DENWA', 'DISPLAY', 'DLI', + 'DO', 'DUPLICATE', 'E', 'ELSE', 'ELSE-IF', 'END', 'END-CASE', + 'END-DO', 'END-IF', 'END-PROC', 'ENDPAGE', 'ENDTABLE', 'ENTER', 'EOF', + 'EQ', 'ERROR', 'EXIT', 'EXTERNAL', 'EZLIB', 'F1', 'F10', 'F11', 'F12', + 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F2', 'F20', 'F21', + 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F3', 'F30', + 'F31', 'F32', 'F33', 'F34', 'F35', 'F36', 'F4', 'F5', 'F6', 'F7', + 'F8', 'F9', 'FETCH', 'FILE-STATUS', 'FILL', 'FINAL', 'FIRST', + 'FIRST-DUP', 'FOR', 'GE', 'GET', 'GO', 'GOTO', 'GQ', 'GR', 'GT', + 'HEADING', 'HEX', 'HIGH-VALUES', 'IDD', 'IDMS', 'IF', 'IN', 'INSERT', + 'JUSTIFY', 'KANJI-DATE', 'KANJI-DATE-LONG', 'KANJI-TIME', 'KEY', + 'KEY-PRESSED', 'KOKUGO', 'KUN', 'LAST-DUP', 'LE', 'LEVEL', 'LIKE', + 'LINE', 'LINE-COUNT', 'LINE-NUMBER', 'LINK', 'LIST', 'LOW-VALUES', + 'LQ', 'LS', 'LT', 'MACRO', 'MASK', 'MATCHED', 'MEND', 'MESSAGE', + 'MOVE', 'MSTART', 'NE', 'NEWPAGE', 'NOMASK', 'NOPRINT', 'NOT', + 'NOTE', 'NOVERIFY', 'NQ', 'NULL', 'OF', 'OR', 'OTHERWISE', 'PA1', + 'PA2', 'PA3', 'PAGE-COUNT', 'PAGE-NUMBER', 'PARM-REGISTER', + 'PATH-ID', 'PATTERN', 'PERFORM', 'POINT', 'POS', 'PRIMARY', 'PRINT', + 'PROCEDURE', 'PROGRAM', 'PUT', 'READ', 'RECORD', 'RECORD-COUNT', + 'RECORD-LENGTH', 'REFRESH', 'RELEASE', 'RENUM', 'REPEAT', 'REPORT', + 'REPORT-INPUT', 'RESHOW', 'RESTART', 'RETRIEVE', 'RETURN-CODE', + 'ROLLBACK', 'ROW', 'S', 'SCREEN', 'SEARCH', 'SECONDARY', 'SELECT', + 'SEQUENCE', 'SIZE', 'SKIP', 'SOKAKU', 'SORT', 'SQL', 'STOP', 'SUM', + 'SYSDATE', 'SYSDATE-LONG', 'SYSIN', 'SYSIPT', 'SYSLST', 'SYSPRINT', + 'SYSSNAP', 'SYSTIME', 'TALLY', 'TERM-COLUMNS', 'TERM-NAME', + 'TERM-ROWS', 'TERMINATION', 'TITLE', 'TO', 'TRANSFER', 'TRC', + 'UNIQUE', 'UNTIL', 'UPDATE', 'UPPERCASE', 'USER', 'USERID', 'VALUE', + 'VERIFY', 'W', 'WHEN', 'WHILE', 'WORK', 'WRITE', 'X', 'XDM', 'XRST' + ] + + tokens = { + 'root': [ + (r'\*.*\n', Comment.Single), + (r'\n+', Whitespace), + # Macro argument + (r'&' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+\.', Name.Variable, 'after_macro_argument'), + # Macro call + (r'%' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Variable), + (r'(FILE|MACRO|REPORT)(\s+)', + bygroups(Keyword.Declaration, Whitespace), 'after_declaration'), + (r'(JOB|PARM)' + r'(' + _DELIMITER_PATTERN + r')', + bygroups(Keyword.Declaration, Operator)), + (words(_KEYWORDS, suffix=_DELIMITER_PATTERN_CAPTURE), + bygroups(Keyword.Reserved, Operator)), + (_OPERATORS_PATTERN, Operator), + # Procedure declaration + (r'(' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+)(\s*)(\.?)(\s*)(PROC)(\s*\n)', + bygroups(Name.Function, Whitespace, Operator, Whitespace, Keyword.Declaration, Whitespace)), + (r'[0-9]+\.[0-9]*', Number.Float), + (r'[0-9]+', Number.Integer), + (r"'(''|[^'])*'", String), + (r'\s+', Whitespace), + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) # Everything else just belongs to a name + ], + 'after_declaration': [ + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function), + ('', Whitespace, '#pop') + ], + 'after_macro_argument': [ + (r'\*.*\n', Comment.Single, '#pop'), + (r'\s+', Whitespace, '#pop'), + (_OPERATORS_PATTERN, Operator, '#pop'), + (r"'(''|[^'])*'", String, '#pop'), + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) # Everything else just belongs to a name + ], + } + _COMMENT_LINE_REGEX = re.compile(r'^\s*\*') + _MACRO_HEADER_REGEX = re.compile(r'^\s*MACRO') + + def analyse_text(text): + """ + Perform a structural analysis for basic Easytrieve constructs. + """ + result = 0.0 + lines = text.split('\n') + hasEndProc = False + hasHeaderComment = False + hasFile = False + hasJob = False + hasProc = False + hasParm = False + hasReport = False + + def isCommentLine(line): + return EasytrieveLexer._COMMENT_LINE_REGEX.match(lines[0]) is not None + + def isEmptyLine(line): + return not bool(line.strip()) + + # Remove possible empty lines and header comments. + while lines and (isEmptyLine(lines[0]) or isCommentLine(lines[0])): + if not isEmptyLine(lines[0]): + hasHeaderComment = True + del lines[0] + + if EasytrieveLexer._MACRO_HEADER_REGEX.match(lines[0]): + # Looks like an Easytrieve macro. + result = 0.4 + if hasHeaderComment: + result += 0.4 + else: + # Scan the source for lines starting with indicators. + for line in lines: + words = line.split() + if (len(words) >= 2): + firstWord = words[0] + if not hasReport: + if not hasJob: + if not hasFile: + if not hasParm: + if firstWord == 'PARM': + hasParm = True + if firstWord == 'FILE': + hasFile = True + if firstWord == 'JOB': + hasJob = True + elif firstWord == 'PROC': + hasProc = True + elif firstWord == 'END-PROC': + hasEndProc = True + elif firstWord == 'REPORT': + hasReport = True + + # Weight the findings. + if hasJob and (hasProc == hasEndProc): + if hasHeaderComment: + result += 0.1 + if hasParm: + if hasProc: + # Found PARM, JOB and PROC/END-PROC: + # pretty sure this is Easytrieve. + result += 0.8 + else: + # Found PARAM and JOB: probably this is Easytrieve + result += 0.5 + else: + # Found JOB and possibly other keywords: might be Easytrieve + result += 0.11 + if hasParm: + # Note: PARAM is not a proper English word, so this is + # regarded a much better indicator for Easytrieve than + # the other words. + result += 0.2 + if hasFile: + result += 0.01 + if hasReport: + result += 0.01 + assert 0.0 <= result <= 1.0 + return result + + +class JclLexer(RegexLexer): + """ + `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_ + is a scripting language used on mainframe platforms to instruct the system + on how to run a batch job or start a subsystem. It is somewhat + comparable to MS DOS batch and Unix shell scripts. + + .. versionadded:: 2.1 + """ + name = 'JCL' + aliases = ['jcl'] + filenames = ['*.jcl'] + mimetypes = ['text/x-jcl'] + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'//\*.*\n', Comment.Single), + (r'//', Keyword.Pseudo, 'statement'), + (r'/\*', Keyword.Pseudo, 'jes2_statement'), + # TODO: JES3 statement + (r'.*\n', Other) # Input text or inline code in any language. + ], + 'statement': [ + (r'\s*\n', Whitespace, '#pop'), + (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)', + bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace), + 'option'), + (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'), + (r'\s+', Whitespace, 'statement_command'), + ], + 'statement_command': [ + (r'\s+(command|cntl|dd|endctl|endif|else|include|jcllib|' + r'output|pend|proc|set|then|xmit)\s+', Keyword.Reserved, 'option'), + include('option') + ], + 'jes2_statement': [ + (r'\s*\n', Whitespace, '#pop'), + (r'\$', Keyword, 'option'), + (r'\b(jobparam|message|netacct|notify|output|priority|route|' + r'setup|signoff|xeq|xmit)\b', Keyword, 'option'), + ], + 'option': [ + #(r'\n', Text, 'root'), + (r'\*', Name.Builtin), + (r'[\[\](){}<>;,]', Punctuation), + (r'[-+*/=&%]', Operator), + (r'[a-z_][a-z_0-9]*', Name), + (r'[0-9]+\.[0-9]*', Number.Float), + (r'\.[0-9]+', Number.Float), + (r'[0-9]+', Number.Integer), + (r"'", String, 'option_string'), + (r'[ \t]+', Whitespace, 'option_comment'), + (r'\.', Punctuation), + ], + 'option_string': [ + (r"(\n)(//)", bygroups(Text, Keyword.Pseudo)), + (r"''", String), + (r"[^']", String), + (r"'", String, '#pop'), + ], + 'option_comment': [ + #(r'\n', Text, 'root'), + (r'.+', Comment.Single), + ] + } + + _JOB_HEADER_PATTERN = re.compile(r'^//[a-z#$@][a-z0-9#$@]{0,7}\s+job(\s+.*)?$', re.IGNORECASE) + + def analyse_text(text): + """ + Recognize JCL job by header. + """ + result = 0.0 + lines = text.split('\n') + if len(lines) > 0: + if JclLexer._JOB_HEADER_PATTERN.match(lines[0]): + result = 1.0 + assert 0.0 <= result <= 1.0 + return result + + diff --git a/pygments/lexers/shell.py b/pygments/lexers/shell.py index 810ee7da..cd9cad15 100644 --- a/pygments/lexers/shell.py +++ b/pygments/lexers/shell.py @@ -47,7 +47,9 @@ class BashLexer(RegexLexer): (r'\$\(\(', Keyword, 'math'), (r'\$\(', Keyword, 'paren'), (r'\$\{#?', String.Interpol, 'curly'), - (r'\$#?(\w+|.)', Name.Variable), + (r'\$[a-fA-F_][a-fA-F0-9_]*', Name.Variable), # user variable + (r'\$(?:\d+|[#$?!_*@-])', Name.Variable), # builtin + (r'\$', Text), ], 'basic': [ (r'\b(if|fi|else|while|do|done|for|then|return|function|case|' @@ -60,7 +62,8 @@ class BashLexer(RegexLexer): r'shopt|source|suspend|test|time|times|trap|true|type|typeset|' r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)', Name.Builtin), - (r'#.*\n', Comment), + (r'\A#!.+\n', Comment.Hashbang), + (r'#.*\n', Comment.Single), (r'\\[\w\W]', String.Escape), (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)), (r'[\[\]{}()=]', Operator), diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index f575ed38..646a9f31 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -489,8 +489,8 @@ class MySqlLexer(RegexLexer): r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|' r'declare|default|delayed|delete|desc|describe|deterministic|' r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|' - r'enclosed|escaped|exists|exit|explain|fetch|float|float4|float8' - r'|for|force|foreign|from|fulltext|grant|group|having|' + r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|' + r'float8|for|force|foreign|from|fulltext|grant|group|having|' r'high_priority|hour_microsecond|hour_minute|hour_second|if|' r'ignore|in|index|infile|inner|inout|insensitive|insert|int|' r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|' diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py new file mode 100644 index 00000000..2fc302df --- /dev/null +++ b/pygments/lexers/supercollider.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.supercollider + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for SuperCollider + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Other + +__all__ = ['SuperColliderLexer'] + +class SuperColliderLexer(RegexLexer): + """ + For SuperCollider source code. + + .. versionadded:: 2.1 + """ + + name = 'SuperCollider' + aliases = ['sc', 'supercollider'] + filenames = ['*.sc', '*.scd'] + mimetypes = ['application/supercollider', 'text/supercollider', ] + + flags = re.DOTALL | re.MULTILINE + tokens = { + 'commentsandwhitespace': [ + (r'\s+', Text), + (r'<!--', Comment), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline) + ], + 'slashstartsregex': [ + include('commentsandwhitespace'), + (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' + r'([gim]+\b|\B)', String.Regex, '#pop'), + (r'(?=/)', Text, ('#pop', 'badregex')), + (r'', Text, '#pop') + ], + 'badregex': [ + (r'\n', Text, '#pop') + ], + 'root': [ + (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), + include('commentsandwhitespace'), + (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' + r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), + (r'[{(\[;,]', Punctuation, 'slashstartsregex'), + (r'[})\].]', Punctuation), + (words(( + 'for', 'in', 'while', 'do', 'break', 'return', 'continue', + 'switch', 'case', 'default', 'if', 'else', 'throw', 'try', + 'catch', 'finally', 'new', 'delete', 'typeof', 'instanceof', + 'void'), suffix=r'\b'), + Keyword, 'slashstartsregex'), + (words(('var', 'let', 'with', 'function', 'arg'), suffix=r'\b'), + Keyword.Declaration, 'slashstartsregex'), + (words(( + '(abstract', 'boolean', 'byte', 'char', 'class', 'const', + 'debugger', 'double', 'enum', 'export', 'extends', 'final', + 'float', 'goto', 'implements', 'import', 'int', 'interface', + 'long', 'native', 'package', 'private', 'protected', 'public', + 'short', 'static', 'super', 'synchronized', 'throws', + 'transient', 'volatile'), suffix=r'\b'), + Keyword.Reserved), + (words(('true', 'false', 'nil', 'inf'), suffix=r'\b'), Keyword.Constant), + (words(( + 'Array', 'Boolean', 'Date', 'Error', 'Function', 'Number', + 'Object', 'Packages', 'RegExp', 'String', 'Error', + 'isFinite', 'isNaN', 'parseFloat', 'parseInt', 'super', + 'thisFunctionDef', 'thisFunction', 'thisMethod', 'thisProcess', + 'thisThread', 'this'), suffix=r'\b'), + Name.Builtin), + (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'\\?[$a-zA-Z_][a-zA-Z0-9_]*', String.Symbol), + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + (r'"(\\\\|\\"|[^"])*"', String.Double), + (r"'(\\\\|\\'|[^'])*'", String.Single), + ] + } diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index 3cb73059..71055a9f 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -369,7 +369,7 @@ class DjangoLexer(RegexLexer): r'with(?:(?:out)?\s*context)?|scoped|ignore\s+missing)\b', Keyword), (r'(loop|block|super|forloop)\b', Name.Builtin), - (r'[a-zA-Z][\w-]*', Name.Variable), + (r'[a-zA-Z_][\w-]*', Name.Variable), (r'\.\w+', Name.Variable), (r':?"(\\\\|\\"|[^"])*"', String.Double), (r":?'(\\\\|\\'|[^'])*'", String.Single), @@ -568,10 +568,12 @@ class MasonLexer(RegexLexer): } def analyse_text(text): - rv = 0.0 - if re.search('<&', text) is not None: - rv = 1.0 - return rv + result = 0.0 + if re.search(r'</%(class|doc|init)%>', text) is not None: + result = 1.0 + elif re.search(r'<&.+&>', text, re.DOTALL) is not None: + result = 0.11 + return result class MakoLexer(RegexLexer): diff --git a/pygments/lexers/testing.py b/pygments/lexers/testing.py index 55f4c054..4a91c5b1 100644 --- a/pygments/lexers/testing.py +++ b/pygments/lexers/testing.py @@ -27,9 +27,9 @@ class GherkinLexer(RegexLexer): mimetypes = ['text/x-gherkin'] feature_keywords = u'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$' - feature_element_keywords = u'^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' + feature_element_keywords = u'^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|剧本大纲|剧本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' examples_keywords = u'^(\\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$' - step_keywords = u'^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' + step_keywords = u'^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假设|假如|假定|但是|但し|並且|并且|同時|同时|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' tokens = { 'comments': [ diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py index 9898b05d..47fdc8b6 100644 --- a/pygments/lexers/theorem.py +++ b/pygments/lexers/theorem.py @@ -414,7 +414,8 @@ class LeanLexer(RegexLexer): '-.', '->', '.', '..', '...', '::', ':>', ';', ';;', '<', '<-', '=', '==', '>', '_', '`', '|', '||', '~', '=>', '<=', '>=', '/\\', '\\/', u'∀', u'Π', u'λ', u'↔', u'∧', u'∨', u'≠', u'≤', u'≥', - u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡' + u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡', + u'⟨', u'⟩' ) punctuation = ('(', ')', ':', '{', '}', '[', ']', u'⦃', u'⦄', ':=', ',') diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py index 08b6c969..c37af144 100644 --- a/pygments/lexers/webmisc.py +++ b/pygments/lexers/webmisc.py @@ -731,9 +731,9 @@ class QmlLexer(RegexLexer): # JavascriptLexer above. name = 'QML' - aliases = ['qml'] - filenames = ['*.qml'] - mimetypes = ['application/x-qml'] + aliases = ['qml', 'qbs'] + filenames = ['*.qml', '*.qbs'] + mimetypes = ['application/x-qml', 'application/x-qt.qbs+qml'] # pasted from JavascriptLexer, with some additions flags = re.DOTALL | re.MULTILINE diff --git a/pygments/modeline.py b/pygments/modeline.py index 54df90c4..2200f1cf 100644 --- a/pygments/modeline.py +++ b/pygments/modeline.py @@ -13,16 +13,19 @@ import re __all__ = ['get_filetype_from_buffer'] + modeline_re = re.compile(r''' (?: vi | vim | ex ) (?: [<=>]? \d* )? : .* (?: ft | filetype | syn | syntax ) = ( [^:\s]+ ) ''', re.VERBOSE) + def get_filetype_from_line(l): m = modeline_re.search(l) if m: return m.group(1) + def get_filetype_from_buffer(buf, max_lines=5): """ Scan the buffer for modelines and return filetype if one is found. @@ -32,7 +35,7 @@ def get_filetype_from_buffer(buf, max_lines=5): ret = get_filetype_from_line(l) if ret: return ret - for l in lines[max_lines:0:-1]: + for l in lines[max_lines:-1:-1]: ret = get_filetype_from_line(l) if ret: return ret diff --git a/pygments/style.py b/pygments/style.py index a49e9b7e..b2b990ea 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -40,7 +40,7 @@ class StyleMeta(type): continue ndef = _styles.get(token.parent, None) styledefs = obj.styles.get(token, '').split() - if not ndef or token is None: + if not ndef or token is None: ndef = ['', 0, 0, 0, '', '', 0, 0, 0] elif 'noinherit' in styledefs and token is not Token: ndef = _styles[Token][:] diff --git a/pygments/styles/__init__.py b/pygments/styles/__init__.py index ca657609..d7a0564a 100644 --- a/pygments/styles/__init__.py +++ b/pygments/styles/__init__.py @@ -38,6 +38,9 @@ STYLE_MAP = { 'igor': 'igor::IgorStyle', 'paraiso-light': 'paraiso_light::ParaisoLightStyle', 'paraiso-dark': 'paraiso_dark::ParaisoDarkStyle', + 'lovelace': 'lovelace::LovelaceStyle', + 'algol': 'algol::AlgolStyle', + 'algol_nu': 'algol_nu::Algol_NuStyle', } diff --git a/pygments/styles/algol.py b/pygments/styles/algol.py new file mode 100644 index 00000000..a8726009 --- /dev/null +++ b/pygments/styles/algol.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.algol + ~~~~~~~~~~~~~~~~~~~~~ + + Algol publication style. + + This style renders source code for publication of algorithms in + scientific papers and academic texts, where its format is frequently used. + + It is based on the style of the revised Algol-60 language report[1]. + + o No colours, only black, white and shades of grey are used. + o Keywords are rendered in lowercase underline boldface. + o Builtins are rendered in lowercase boldface italic. + o Docstrings and pragmas are rendered in dark grey boldface. + o Library identifiers are rendered in dark grey boldface italic. + o Comments are rendered in grey italic. + + To render keywords without underlining, refer to the `Algol_Nu` style. + + For lowercase conversion of keywords and builtins in languages where + these are not or might not be lowercase, a supporting lexer is required. + The Algol and Modula-2 lexers automatically convert to lowercase whenever + this style is selected. + + [1] `Revised Report on the Algorithmic Language Algol-60 <http://www.masswerk.at/algol60/report.htm>` + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, Operator + + +class AlgolStyle(Style): + + background_color = "#ffffff" + default_style = "" + + styles = { + Comment: "italic #888", + Comment.Preproc: "bold noitalic #888", + Comment.Special: "bold noitalic #888", + + Keyword: "underline bold", + Keyword.Declaration: "italic", + + Name.Builtin: "bold italic", + Name.Builtin.Pseudo: "bold italic", + Name.Namespace: "bold italic #666", + Name.Class: "bold italic #666", + Name.Function: "bold italic #666", + Name.Variable: "bold italic #666", + Name.Constant: "bold italic #666", + + Operator.Word: "bold", + + String: "italic #666", + + Error: "border:#FF0000" + } diff --git a/pygments/styles/algol_nu.py b/pygments/styles/algol_nu.py new file mode 100644 index 00000000..392838f2 --- /dev/null +++ b/pygments/styles/algol_nu.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.algol_nu + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Algol publication style without underlining of keywords. + + This style renders source code for publication of algorithms in + scientific papers and academic texts, where its format is frequently used. + + It is based on the style of the revised Algol-60 language report[1]. + + o No colours, only black, white and shades of grey are used. + o Keywords are rendered in lowercase boldface. + o Builtins are rendered in lowercase boldface italic. + o Docstrings and pragmas are rendered in dark grey boldface. + o Library identifiers are rendered in dark grey boldface italic. + o Comments are rendered in grey italic. + + To render keywords with underlining, refer to the `Algol` style. + + For lowercase conversion of keywords and builtins in languages where + these are not or might not be lowercase, a supporting lexer is required. + The Algol and Modula-2 lexers automatically convert to lowercase whenever + this style is selected. + + [1] `Revised Report on the Algorithmic Language Algol-60 <http://www.masswerk.at/algol60/report.htm>` + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, Operator + + +class Algol_NuStyle(Style): + + background_color = "#ffffff" + default_style = "" + + styles = { + Comment: "italic #888", + Comment.Preproc: "bold noitalic #888", + Comment.Special: "bold noitalic #888", + + Keyword: "bold", + Keyword.Declaration: "italic", + + Name.Builtin: "bold italic", + Name.Builtin.Pseudo: "bold italic", + Name.Namespace: "bold italic #666", + Name.Class: "bold italic #666", + Name.Function: "bold italic #666", + Name.Variable: "bold italic #666", + Name.Constant: "bold italic #666", + + Operator.Word: "bold", + + String: "italic #666", + + Error: "border:#FF0000" + } diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py new file mode 100644 index 00000000..31bd5505 --- /dev/null +++ b/pygments/styles/lovelace.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.lovelace + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lovelace by Miikka Salminen + + Pygments style by Miikka Salminen (https://github.com/miikkas) + A desaturated, somewhat subdued style created for the Lovelace interactive + learning environment. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, \ + Number, Operator, Punctuation, Generic, Whitespace + + +class LovelaceStyle(Style): + """ + The style used in Lovelace interactive learning environment. Tries to avoid + the "angry fruit salad" effect with desaturated and dim colours. + """ + _KW_BLUE = '#2838b0' + _NAME_GREEN = '#388038' + _DOC_ORANGE = '#b85820' + _OW_PURPLE = '#a848a8' + _FUN_BROWN = '#785840' + _STR_RED = '#b83838' + _CLS_CYAN = '#287088' + _ESCAPE_LIME = '#709030' + _LABEL_CYAN = '#289870' + _EXCEPT_YELLOW = '#908828' + + default_style = '#222222' + + styles = { + Whitespace: '#a89028', + Comment: 'italic #888888', + Comment.Hashbang: _CLS_CYAN, + Comment.Multiline: '#888888', + Comment.Preproc: 'noitalic '+_LABEL_CYAN, + + Keyword: _KW_BLUE, + Keyword.Constant: 'italic #444444', + Keyword.Declaration: 'italic', + Keyword.Type: 'italic', + + Operator: '#666666', + Operator.Word: _OW_PURPLE, + + Punctuation: '#888888', + + Name.Attribute: _NAME_GREEN, + Name.Builtin: _NAME_GREEN, + Name.Builtin.Pseudo: 'italic', + Name.Class: _CLS_CYAN, + Name.Constant: _DOC_ORANGE, + Name.Decorator: _CLS_CYAN, + Name.Entity: _ESCAPE_LIME, + Name.Exception: _EXCEPT_YELLOW, + Name.Function: _FUN_BROWN, + Name.Label: _LABEL_CYAN, + Name.Namespace: _LABEL_CYAN, + Name.Tag: _KW_BLUE, + Name.Variable: '#b04040', + Name.Variable.Global:_EXCEPT_YELLOW, + + String: _STR_RED, + String.Char: _OW_PURPLE, + String.Doc: 'italic '+_DOC_ORANGE, + String.Escape: _ESCAPE_LIME, + String.Interpol: 'underline', + String.Other: _OW_PURPLE, + String.Regex: _OW_PURPLE, + + Number: '#444444', + + Generic.Deleted: '#c02828', + Generic.Emph: 'italic', + Generic.Error: '#c02828', + Generic.Heading: '#666666', + Generic.Subheading: '#444444', + Generic.Inserted: _NAME_GREEN, + Generic.Output: '#666666', + Generic.Prompt: '#444444', + Generic.Strong: 'bold', + Generic.Traceback: _KW_BLUE, + + Error: 'bg:'+_OW_PURPLE, + } diff --git a/pygments/token.py b/pygments/token.py index e5eadf0d..bfdfc114 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -179,6 +179,7 @@ STANDARD_TYPES = { Punctuation: 'p', Comment: 'c', + Comment.Hashbang: 'ch', Comment.Multiline: 'cm', Comment.Preproc: 'cp', Comment.Single: 'c1', diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..4754a9d2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +coverage +nose +pyflakes +pylint +tox diff --git a/scripts/debug_lexer.py b/scripts/debug_lexer.py index 4dac42ca..cedd0988 100755 --- a/scripts/debug_lexer.py +++ b/scripts/debug_lexer.py @@ -110,24 +110,24 @@ def main(fn, lexer=None, options={}): if lxcls is None: raise AssertionError('no lexer found for file %r' % fn) debug_lexer = False - if profile: - # does not work for e.g. ExtendedRegexLexers - if lxcls.__bases__ == (RegexLexer,): - # yes we can! (change the metaclass) - lxcls.__class__ = ProfilingRegexLexerMeta - lxcls.__bases__ = (ProfilingRegexLexer,) - lxcls._prof_sort_index = profsort - else: - if lxcls.__bases__ == (RegexLexer,): - lxcls.__bases__ = (DebuggingRegexLexer,) - debug_lexer = True - elif lxcls.__bases__ == (DebuggingRegexLexer,): - # already debugged before - debug_lexer = True - else: - # HACK: ExtendedRegexLexer subclasses will only partially work here. - lxcls.__bases__ = (DebuggingRegexLexer,) - debug_lexer = True + # if profile: + # # does not work for e.g. ExtendedRegexLexers + # if lxcls.__bases__ == (RegexLexer,): + # # yes we can! (change the metaclass) + # lxcls.__class__ = ProfilingRegexLexerMeta + # lxcls.__bases__ = (ProfilingRegexLexer,) + # lxcls._prof_sort_index = profsort + # else: + # if lxcls.__bases__ == (RegexLexer,): + # lxcls.__bases__ = (DebuggingRegexLexer,) + # debug_lexer = True + # elif lxcls.__bases__ == (DebuggingRegexLexer,): + # # already debugged before + # debug_lexer = True + # else: + # # HACK: ExtendedRegexLexer subclasses will only partially work here. + # lxcls.__bases__ = (DebuggingRegexLexer,) + # debug_lexer = True lx = lxcls(**options) lno = 1 @@ -149,13 +149,10 @@ def main(fn, lexer=None, options={}): for type, val in lx.get_tokens(text): lno += val.count('\n') - if type == Error: + if type == Error and not ignerror: print('Error parsing', fn, 'on line', lno) - print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') - if showall: - for tok, state in map(None, tokens, states): - show_token(tok, state) - else: + if not showall: + print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') for i in range(max(len(tokens) - num, 0), len(tokens)): if debug_lexer: show_token(tokens[i], states[i]) @@ -175,9 +172,8 @@ def main(fn, lexer=None, options={}): states.append(lx.ctx.stack[:]) else: states.append(None) - if showall: - for tok, state in zip(tokens, states): - show_token(tok, state) + if showall: + show_token((type, val), states[-1] if debug_lexer else None) return 0 @@ -201,6 +197,7 @@ Debugging lexing errors: -n N show the last N tokens on error -a always show all lexed tokens (default is only to show them when an error occurs) + -e do not stop on error tokens Profiling: @@ -212,6 +209,7 @@ Profiling: num = 10 showall = False +ignerror = False lexer = None options = {} profile = False @@ -219,12 +217,14 @@ profsort = 4 if __name__ == '__main__': import getopt - opts, args = getopt.getopt(sys.argv[1:], 'n:l:apO:s:h') + opts, args = getopt.getopt(sys.argv[1:], 'n:l:aepO:s:h') for opt, val in opts: if opt == '-n': num = int(val) elif opt == '-a': showall = True + elif opt == '-e': + ignerror = True elif opt == '-l': lexer = val elif opt == '-p': diff --git a/tests/examplefiles/99_bottles_of_beer.chpl b/tests/examplefiles/99_bottles_of_beer.chpl index 47fcaaf6..3629028d 100644 --- a/tests/examplefiles/99_bottles_of_beer.chpl +++ b/tests/examplefiles/99_bottles_of_beer.chpl @@ -159,3 +159,16 @@ var wideOpen = 0o777, clique_y = 0O660, zeroOct = 0o0, minPosOct = 0O1; + +private module M3 { + private proc foo() { + + } + + private iter bar() { + + } + + private var x: int; + +}
\ No newline at end of file diff --git a/tests/examplefiles/ahcon.f b/tests/examplefiles/ahcon.f new file mode 100644 index 00000000..48ae920b --- /dev/null +++ b/tests/examplefiles/ahcon.f @@ -0,0 +1,340 @@ + SUBROUTINE AHCON (SIZE,N,M,A,B,OLEVR,OLEVI,CLEVR,CLEVI, TRUNCATED + & SCR1,SCR2,IPVT,JPVT,CON,WORK,ISEED,IERR) !Test inline comment +C +C FUNCTION: +CF +CF Determines whether the pair (A,B) is controllable and flags +CF the eigenvalues corresponding to uncontrollable modes. +CF this ad-hoc controllability calculation uses a random matrix F +CF and computes whether eigenvalues move from A to the controlled +CF system A+B*F. +CF +C USAGE: +CU +CU CALL AHCON (SIZE,N,M,A,B,OLEVR,OLEVI,CLEVR,CLEVI,SCR1,SCR2,IPVT, +CU JPVT,CON,WORK,ISEED,IERR) +CU +CU since AHCON generates different random F matrices for each +CU call, as long as iseed is not re-initialized by the main +CU program, and since this code has the potential to be fooled +CU by extremely ill-conditioned problems, the cautious user +CU may wish to call it multiple times and rely, perhaps, on +CU a 2-of-3 vote. We believe, but have not proved, that any +CU errors this routine may produce are conservative--i.e., that +CU it may flag a controllable mode as uncontrollable, but +CU not vice-versa. +CU +C INPUTS: +CI +CI SIZE integer - first dimension of all 2-d arrays. +CI +CI N integer - number of states. +CI +CI M integer - number of inputs. +CI +CI A double precision - SIZE by N array containing the +CI N by N system dynamics matrix A. +CI +CI B double precision - SIZE by M array containing the +CI N by M system input matrix B. +CI +CI ISEED initial seed for random number generator; if ISEED=0, +CI then AHCON will set ISEED to a legal value. +CI +C OUTPUTS: +CO +CO OLEVR double precision - N dimensional vector containing the +CO real parts of the eigenvalues of A. +CO +CO OLEVI double precision - N dimensional vector containing the +CO imaginary parts of the eigenvalues of A. +CO +CO CLEVR double precision - N dimensional vector work space +CO containing the real parts of the eigenvalues of A+B*F, +CO where F is the random matrix. +CO +CO CLEVI double precision - N dimensional vector work space +CO containing the imaginary parts of the eigenvalues of +CO A+B*F, where F is the random matrix. +CO +CO SCR1 double precision - N dimensional vector containing the +CO magnitudes of the corresponding eigenvalues of A. +CO +CO SCR2 double precision - N dimensional vector containing the +CO damping factors of the corresponding eigenvalues of A. +CO +CO IPVT integer - N dimensional vector; contains the row pivots +CO used in finding the nearest neighbor eigenvalues between +CO those of A and of A+B*F. The IPVT(1)th eigenvalue of +CO A and the JPVT(1)th eigenvalue of A+B*F are the closest +CO pair. +CO +CO JPVT integer - N dimensional vector; contains the column +CO pivots used in finding the nearest neighbor eigenvalues; +CO see IPVT. +CO +CO CON logical - N dimensional vector; flagging the uncontrollable +CO modes of the system. CON(I)=.TRUE. implies the +CO eigenvalue of A given by DCMPLX(OLEVR(IPVT(I)),OLEVI(IPVT(i))) +CO corresponds to a controllable mode; CON(I)=.FALSE. +CO implies an uncontrollable mode for that eigenvalue. +CO +CO WORK double precision - SIZE by N dimensional array containing +CO an N by N matrix. WORK(I,J) is the distance between +CO the open loop eigenvalue given by DCMPLX(OLEVR(I),OLEVI(I)) +CO and the closed loop eigenvalue of A+B*F given by +CO DCMPLX(CLEVR(J),CLEVI(J)). +CO +CO IERR integer - IERR=0 indicates normal return; a non-zero +CO value indicates trouble in the eigenvalue calculation. +CO see the EISPACK and EIGEN documentation for details. +CO +C ALGORITHM: +CA +CA Calculate eigenvalues of A and of A+B*F for a randomly +CA generated F, and see which ones change. Use a full pivot +CA search through a matrix of euclidean distance measures +CA between each pair of eigenvalues from (A,A+BF) to +CA determine the closest pairs. +CA +C MACHINE DEPENDENCIES: +CM +CM NONE +CM +C HISTORY: +CH +CH written by: Birdwell & Laub +CH date: May 18, 1985 +CH current version: 1.0 +CH modifications: made machine independent and modified for +CH f77:bb:8-86. +CH changed cmplx -> dcmplx: 7/27/88 jdb +CH +C ROUTINES CALLED: +CC +CC EIGEN,RAND +CC +C COMMON MEMORY USED: +CM +CM none +CM +C---------------------------------------------------------------------- +C written for: The CASCADE Project +C Oak Ridge National Laboratory +C U.S. Department of Energy +C contract number DE-AC05-840R21400 +C subcontract number 37B-7685 S13 +C organization: The University of Tennessee +C---------------------------------------------------------------------- +C THIS SOFTWARE IS IN THE PUBLIC DOMAIN +C NO RESTRICTIONS ON ITS USE ARE IMPLIED +C---------------------------------------------------------------------- +C +C--global variables: +C + INTEGER SIZE + INTEGER N + INTEGER M + INTEGER IPVT(1) + INTEGER JPVT(1) + INTEGER IERR +C + DOUBLE PRECISION A(SIZE,N) + DOUBLE PRECISION B(SIZE,M) + DOUBLE PRECISION WORK(SIZE,N) + DOUBLE PRECISION CLEVR(N) + DOUBLE PRECISION CLEVI(N) + DOUBLE PRECISION OLEVR(N) + DOUBLE PRECISION OLEVI(N) + DOUBLE PRECISION SCR1(N) + DOUBLE PRECISION SCR2(N) +C + LOGICAL CON(N) +C +C--local variables: +C + INTEGER ISEED + INTEGER ITEMP + INTEGER K1 + INTEGER K2 + INTEGER I + INTEGER J + INTEGER K + INTEGER IMAX + INTEGER JMAX +C + DOUBLE PRECISION VALUE + DOUBLE PRECISION EPS + DOUBLE PRECISION EPS1 + DOUBLE PRECISION TEMP + DOUBLE PRECISION CURR + DOUBLE PRECISION ANORM + DOUBLE PRECISION BNORM + DOUBLE PRECISION COLNRM + DOUBLE PRECISION RNDMNO +C + DOUBLE COMPLEX DCMPLX +C +C--compute machine epsilon +C + EPS = 1.D0 +100 CONTINUE + EPS = EPS / 2.D0 + EPS1 = 1.D0 + EPS + IF (EPS1 .NE. 1.D0) GO TO 100 + EPS = EPS * 2.D0 +C +C--compute the l-1 norm of a +C + ANORM = 0.0D0 + DO 120 J = 1, N + COLNRM = 0.D0 + DO 110 I = 1, N + COLNRM = COLNRM + ABS(A(I,J)) +110 CONTINUE + IF (COLNRM .GT. ANORM) ANORM = COLNRM +120 CONTINUE +C +C--compute the l-1 norm of b +C + BNORM = 0.0D0 + DO 140 J = 1, M + COLNRM = 0.D0 + DO 130 I = 1, N + COLNRM = COLNRM + ABS(B(I,J)) +130 CONTINUE + IF (COLNRM .GT. BNORM) BNORM = COLNRM +140 CONTINUE +C +C--compute a + b * f +C + DO 160 J = 1, N + DO 150 I = 1, N + WORK(I,J) = A(I,J) +150 CONTINUE +160 CONTINUE +C +C--the elements of f are random with uniform distribution +C--from -anorm/bnorm to +anorm/bnorm +C--note that f is not explicitly stored as a matrix +C--pathalogical floating point notes: the if (bnorm .gt. 0.d0) +C--test should actually be if (bnorm .gt. dsmall), where dsmall +C--is the smallest representable number whose reciprocal does +C--not generate an overflow or loss of precision. +C + IF (ISEED .EQ. 0) ISEED = 86345823 + IF (ANORM .EQ. 0.D0) ANORM = 1.D0 + IF (BNORM .GT. 0.D0) THEN + TEMP = 2.D0 * ANORM / BNORM + ELSE + TEMP = 2.D0 + END IF + DO 190 K = 1, M + DO 180 J = 1, N + CALL RAND(ISEED,ISEED,RNDMNO) + VALUE = (RNDMNO - 0.5D0) * TEMP + DO 170 I = 1, N + WORK(I,J) = WORK(I,J) + B(I,K)*VALUE +170 CONTINUE +180 CONTINUE +190 CONTINUE +C +C--compute the eigenvalues of a + b*f, and several other things +C + CALL EIGEN (0,SIZE,N,WORK,CLEVR,CLEVI,WORK,SCR1,SCR2,IERR) + IF (IERR .NE. 0) RETURN +C +C--copy a so it is not destroyed +C + DO 210 J = 1, N + DO 200 I = 1, N + WORK(I,J) = A(I,J) +200 CONTINUE +210 CONTINUE +C +C--compute the eigenvalues of a, and several other things +C + CALL EIGEN (0,SIZE,N,WORK,OLEVR,OLEVI,WORK,SCR1,SCR2,IERR) + IF (IERR .NE. 0) RETURN +C +C--form the matrix of distances between eigenvalues of a and +C--EIGENVALUES OF A+B*F +C + DO 230 J = 1, N + DO 220 I = 1, N + WORK(I,J) = + & ABS(DCMPLX(OLEVR(I),OLEVI(I))-DCMPLX(CLEVR(J),CLEVI(J))) +220 CONTINUE +230 CONTINUE +C +C--initialize row and column pivots +C + DO 240 I = 1, N + IPVT(I) = I + JPVT(I) = I +240 CONTINUE +C +C--a little bit messy to avoid swapping columns and +C--rows of work +C + DO 270 I = 1, N-1 +C +C--find the minimum element of each lower right square +C--submatrix of work, for submatrices of size n x n +C--through 2 x 2 +C + CURR = WORK(IPVT(I),JPVT(I)) + IMAX = I + JMAX = I + TEMP = CURR +C +C--find the minimum element +C + DO 260 K1 = I, N + DO 250 K2 = I, N + IF (WORK(IPVT(K1),JPVT(K2)) .LT. TEMP) THEN + TEMP = WORK(IPVT(K1),JPVT(K2)) + IMAX = K1 + JMAX = K2 + END IF +250 CONTINUE +260 CONTINUE +C +C--update row and column pivots for indirect addressing of work +C + ITEMP = IPVT(I) + IPVT(I) = IPVT(IMAX) + IPVT(IMAX) = ITEMP +C + ITEMP = JPVT(I) + JPVT(I) = JPVT(JMAX) + JPVT(JMAX) = ITEMP +C +C--do next submatrix +C +270 CONTINUE +C +C--this threshold for determining when an eigenvalue has +C--not moved, and is therefore uncontrollable, is critical, +C--and may require future changes with more experience. +C + EPS1 = SQRT(EPS) +C +C--for each eigenvalue pair, decide if it is controllable +C + DO 280 I = 1, N +C +C--note that we are working with the "pivoted" work matrix +C--and are looking at its diagonal elements +C + IF (WORK(IPVT(I),JPVT(I))/ANORM .LE. EPS1) THEN + CON(I) = .FALSE. + ELSE + CON(I) = .TRUE. + END IF +280 CONTINUE +C +C--finally! +C + RETURN + END diff --git a/tests/examplefiles/autoit_submit.au3 b/tests/examplefiles/autoit_submit.au3 index e5054dea..84fb7150 100644 --- a/tests/examplefiles/autoit_submit.au3 +++ b/tests/examplefiles/autoit_submit.au3 @@ -16,8 +16,10 @@ _IEFormElementOptionSelect ($oSelect, "S2", 1, "byText") ;options raido
_IEFormElementRadioSelect($oForm, "2nd", "type", 1, "byValue")
+#cs
ConsoleWrite(@Error)
Sleep(10000)
+#ce
_IEFormSubmit($oForm, 0)
_IELoadWait($oIE)
Sleep(60000)
diff --git a/tests/examplefiles/eval.rs b/tests/examplefiles/eval.rs new file mode 100644 index 00000000..17e585a0 --- /dev/null +++ b/tests/examplefiles/eval.rs @@ -0,0 +1,606 @@ +// ------------------------------------------------------------------------------------------------- +// Rick, a Rust intercal compiler. Save your souls! +// +// Copyright (c) 2015 Georg Brandl +// +// This program is free software; you can redistribute it and/or modify it under the terms of the +// GNU General Public License as published by the Free Software Foundation; either version 2 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without +// even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along with this program; +// if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +// ------------------------------------------------------------------------------------------------- + +/// Interprets INTERCAL source. +/// +/// The evaluator is used when rick is called with `-i`, or when the compiler generates +/// the output while compiling (in the constant-output case). + +use std::fmt::{ Debug, Display }; +use std::io::Write; +use std::u16; + +use err::{ Res, IE123, IE129, IE252, IE275, IE555, IE633, IE774, IE994 }; +use ast::{ self, Program, Stmt, StmtBody, ComeFrom, Expr, Var, VType }; +use stdops::{ Bind, Array, write_number, read_number, check_chance, check_ovf, pop_jumps, + get_random_seed, mingle, select, and_16, and_32, or_16, or_32, xor_16, xor_32 }; + + +/// Represents a value (either 16-bit or 32-bit) at runtime. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Val { + I16(u16), + I32(u32), +} + +impl Val { + /// Cast as a 16-bit value; returns an error if 32-bit and too big. + pub fn as_u16(&self) -> Res<u16> { + match *self { + Val::I16(v) => Ok(v), + Val::I32(v) => { + if v > (u16::MAX as u32) { + return IE275.err(); + } + Ok(v as u16) + } + } + } + + /// Cast as a 32-bit value; always succeeds. + pub fn as_u32(&self) -> u32 { + match *self { + Val::I16(v) => v as u32, + Val::I32(v) => v + } + } + + /// Cast as an usize value; always succeeds. + pub fn as_usize(&self) -> usize { + self.as_u32() as usize + } + + /// Create from a 32-bit value; will select the smallest possible type. + pub fn from_u32(v: u32) -> Val { + if v & 0xFFFF == v { + Val::I16(v as u16) + } else { + Val::I32(v) + } + } +} + +/// The state of the interpreter's evaluator. +pub struct Eval<'a> { + /// Program to execute. + program: &'a Program, + /// Stream to use for printing output. + stdout: &'a mut Write, + /// Whether to print debugging output during execution. + debug: bool, + /// Variable bindings for the four types of variables. + spot: Vec<Bind<u16>>, + twospot: Vec<Bind<u32>>, + tail: Vec<Bind<Array<u16>>>, + hybrid: Vec<Bind<Array<u32>>>, + /// The infamous NEXT stack, capable of holding 80 elements. + jumps: Vec<ast::LogLine>, + /// Abstain counter for each statement. + abstain: Vec<u32>, + /// Binary I/O "tape" state. + last_in: u8, + last_out: u8, + /// Random number generator state. + rand_st: u32, + /// Counts the number of executed statements. + stmt_ctr: usize, +} + +/// Represents the control flow effect of an executed statement. +enum StmtRes { + /// normal execution, next statement + Next, + /// jump around, from DO ... NEXT + Jump(usize), + /// jump back, from RESUME + Back(usize), + /// start from the first statement, from TRY AGAIN + FromTop, + /// end the program, from GIVE UP + End, +} + +impl<'a> Eval<'a> { + /// Construct a new evaluator. + pub fn new(program: &'a Program, stdout: &'a mut Write, debug: bool, + random: bool) -> Eval<'a> { + let abs = program.stmts.iter().map(|stmt| stmt.props.disabled as u32).collect(); + let nvars = (program.var_info.0.len(), + program.var_info.1.len(), + program.var_info.2.len(), + program.var_info.3.len()); + Eval { + program: program, + stdout: stdout, + debug: debug, + spot: vec![Bind::new(0); nvars.0], + twospot: vec![Bind::new(0); nvars.1], + tail: vec![Bind::new(Array::empty()); nvars.2], + hybrid: vec![Bind::new(Array::empty()); nvars.3], + jumps: Vec::with_capacity(80), + rand_st: if random { get_random_seed() } else { 0 }, + abstain: abs, + last_in: 0, + last_out: 0, + stmt_ctr: 0, + } + } + + /// Interpret the program. Returns either the number of executed statements, + /// or an error (RtError). + pub fn eval(&mut self) -> Res<usize> { + let mut pctr = 0; // index of current statement + let program = self.program.clone(); + let nstmts = program.stmts.len(); + loop { + // check for falling off the end + if pctr >= nstmts { + // if the last statement was a TRY AGAIN, falling off the end is fine + if let StmtBody::TryAgain = program.stmts[program.stmts.len() - 1].body { + break; + } + return IE633.err(); + } + self.stmt_ctr += 1; + let stmt = &program.stmts[pctr]; + // execute statement if not abstained + if self.abstain[pctr] == 0 { + // check execution chance + let (passed, rand_st) = check_chance(stmt.props.chance, self.rand_st); + self.rand_st = rand_st; + if passed { + // try to eval this statement + let res = match self.eval_stmt(stmt) { + // on error, set the correct line number and bubble up + Err(mut err) => { + err.set_line(stmt.props.onthewayto); + // special treatment for NEXT + if let StmtBody::DoNext(n) = stmt.body { + if let Some(i) = program.labels.get(&n) { + err.set_line(program.stmts[*i as usize].props.srcline); + } + } + return Err(err); + } + Ok(res) => res + }; + // handle control flow effects + match res { + StmtRes::Next => { } + StmtRes::Jump(n) => { + self.jumps.push(pctr as u16); // push the line with the NEXT + pctr = n; + continue; // do not increment or check for COME FROMs + } + StmtRes::Back(n) => { + pctr = n; // will be incremented below after COME FROM check + } + StmtRes::FromTop => { + pctr = 0; // start from the beginning, do not push any stack + continue; + } + StmtRes::End => break, + } + } + } + // if we are on the line with the compiler bug, error out + if pctr == self.program.bugline as usize { + return IE774.err_with(None, stmt.props.onthewayto); + } + // try to determine if we have to go to a COME FROM statement + // (note: in general, program.stmts[pctr] != stmt) + // + // the static COME FROM is always a possibility + let mut maybe_next = program.stmts[pctr].comefrom; + // the complicated case: evaluate all computed-come-from expressions + let my_label = program.stmts[pctr].props.label; + if program.uses_complex_comefrom && my_label > 0 { + for (i, stmt) in program.stmts.iter().enumerate() { + if let StmtBody::ComeFrom(ComeFrom::Expr(ref e)) = stmt.body { + let v = try!(try!(self.eval_expr(e)).as_u16()); + if v == my_label { + // as soon as we have multiple candidates, we can bail out + if maybe_next.is_some() { + return IE555.err(); + } + maybe_next = Some(i as u16); + } + } + } + } + // check for COME FROMs from this line + if let Some(next) = maybe_next { + let next = next as usize; + // check for abstained COME FROM + if self.abstain[next] == 0 { + // the COME FROM can also have a % chance + let (passed, rand_st) = check_chance(program.stmts[next].props.chance, + self.rand_st); + self.rand_st = rand_st; + if passed { + pctr = next; + continue; + } + } + } + // no COME FROM, normal execution + pctr += 1; + } + Ok(self.stmt_ctr) + } + + /// Interpret a single statement. + fn eval_stmt(&mut self, stmt: &Stmt) -> Res<StmtRes> { + if self.debug { + println!("\nExecuting Stmt #{} (state before following)", self.stmt_ctr); + self.dump_state(); + println!("{}", stmt); + } + match stmt.body { + StmtBody::Calc(ref var, ref expr) => { + let val = try!(self.eval_expr(expr)); + try!(self.assign(var, val)); + Ok(StmtRes::Next) + } + StmtBody::Dim(ref var, ref exprs) => { + try!(self.array_dim(var, exprs)); + Ok(StmtRes::Next) + } + StmtBody::DoNext(n) => { + match self.program.labels.get(&n) { + // too many jumps on stack already? + Some(_) if self.jumps.len() >= 80 => IE123.err(), + Some(i) => Ok(StmtRes::Jump(*i as usize)), + None => IE129.err(), + } + } + StmtBody::ComeFrom(_) => { + // nothing to do here at runtime + Ok(StmtRes::Next) + } + StmtBody::Resume(ref expr) => { + let n = try!(self.eval_expr(expr)).as_u32(); + // this expect() is safe: if the third arg is true, there will + // be no Ok(None) returns + let next = try!(pop_jumps(&mut self.jumps, n, true, 0)) + .expect("https://xkcd.com/378/ ?!"); + Ok(StmtRes::Back(next as usize)) + } + StmtBody::Forget(ref expr) => { + let n = try!(self.eval_expr(expr)).as_u32(); + try!(pop_jumps(&mut self.jumps, n, false, 0)); + Ok(StmtRes::Next) + } + StmtBody::Ignore(ref vars) => { + for var in vars { + self.set_rw(var, false); + } + Ok(StmtRes::Next) + } + StmtBody::Remember(ref vars) => { + for var in vars { + self.set_rw(var, true); + } + Ok(StmtRes::Next) + } + StmtBody::Stash(ref vars) => { + for var in vars { + self.stash(var); + } + Ok(StmtRes::Next) + } + StmtBody::Retrieve(ref vars) => { + for var in vars { + try!(self.retrieve(var)); + } + Ok(StmtRes::Next) + } + StmtBody::Abstain(ref expr, ref whats) => { + let f: Box<Fn(u32) -> u32> = if let Some(ref e) = *expr { + let n = try!(self.eval_expr(e)).as_u32(); + box move |v: u32| v.saturating_add(n) + } else { + box |_| 1 + }; + for what in whats { + self.abstain(what, &*f); + } + Ok(StmtRes::Next) + } + StmtBody::Reinstate(ref whats) => { + for what in whats { + self.abstain(what, &|v: u32| v.saturating_sub(1)); + } + Ok(StmtRes::Next) + } + StmtBody::ReadOut(ref vars) => { + for var in vars { + match *var { + // read out whole array + Expr::Var(ref var) if var.is_dim() => { + try!(self.array_readout(var)); + } + // read out single var or array element + Expr::Var(ref var) => { + let varval = try!(self.lookup(var)); + try!(write_number(self.stdout, varval.as_u32(), 0)); + } + // read out constant + Expr::Num(_, v) => try!(write_number(self.stdout, v, 0)), + // others will not be generated + _ => return IE994.err(), + }; + } + Ok(StmtRes::Next) + } + StmtBody::WriteIn(ref vars) => { + for var in vars { + if var.is_dim() { + // write in whole array + try!(self.array_writein(var)); + } else { + // write in single var or array element + let n = try!(read_number(0)); + try!(self.assign(var, Val::from_u32(n))); + } + } + Ok(StmtRes::Next) + } + // this one is only generated by the constant-program optimizer + StmtBody::Print(ref s) => { + if let Err(_) = self.stdout.write(&s) { + return IE252.err(); + } + Ok(StmtRes::Next) + } + StmtBody::TryAgain => Ok(StmtRes::FromTop), + StmtBody::GiveUp => Ok(StmtRes::End), + StmtBody::Error(ref e) => Err((*e).clone()), + } + } + + /// Evaluate an expression to a value. + fn eval_expr(&self, expr: &Expr) -> Res<Val> { + match *expr { + Expr::Num(vtype, v) => match vtype { + VType::I16 => Ok(Val::I16(v as u16)), + VType::I32 => Ok(Val::I32(v)), + }, + Expr::Var(ref var) => self.lookup(var), + Expr::Mingle(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)).as_u32(); + let w = try!(self.eval_expr(wx)).as_u32(); + let v = try!(check_ovf(v, 0)); + let w = try!(check_ovf(w, 0)); + Ok(Val::I32(mingle(v, w))) + } + Expr::Select(vtype, ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + if vtype == VType::I16 { + Ok(Val::I16(select(v.as_u32(), try!(w.as_u16()) as u32) as u16)) + } else { + Ok(Val::I32(select(v.as_u32(), w.as_u32()))) + } + } + Expr::And(vtype, ref vx) => { + let v = try!(self.eval_expr(vx)); + match vtype { + VType::I16 => Ok(Val::I16(and_16(try!(v.as_u16()) as u32) as u16)), + VType::I32 => Ok(Val::I32(and_32(v.as_u32()))), + } + } + Expr::Or(vtype, ref vx) => { + let v = try!(self.eval_expr(vx)); + match vtype { + VType::I16 => Ok(Val::I16(or_16(try!(v.as_u16()) as u32) as u16)), + VType::I32 => Ok(Val::I32(or_32(v.as_u32()))), + } + } + Expr::Xor(vtype, ref vx) => { + let v = try!(self.eval_expr(vx)); + match vtype { + VType::I16 => Ok(Val::I16(xor_16(try!(v.as_u16()) as u32) as u16)), + VType::I32 => Ok(Val::I32(xor_32(v.as_u32()))), + } + } + Expr::RsNot(ref vx) => { + let v = try!(self.eval_expr(vx)); + Ok(Val::I32(!v.as_u32())) + } + Expr::RsAnd(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() & w.as_u32())) + } + Expr::RsOr(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() | w.as_u32())) + } + Expr::RsXor(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() ^ w.as_u32())) + } + Expr::RsRshift(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() >> w.as_u32())) + } + Expr::RsLshift(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() << w.as_u32())) + } + // Expr::RsEqual(ref vx, ref wx) => { + // let v = try!(self.eval_expr(vx)); + // let w = try!(self.eval_expr(wx)); + // Ok(Val::I32((v.as_u32() == w.as_u32()) as u32)) + // } + Expr::RsNotEqual(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32((v.as_u32() != w.as_u32()) as u32)) + } + Expr::RsPlus(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() + w.as_u32())) + } + Expr::RsMinus(ref vx, ref wx) => { + let v = try!(self.eval_expr(vx)); + let w = try!(self.eval_expr(wx)); + Ok(Val::I32(v.as_u32() - w.as_u32())) + } + } + } + + #[inline] + fn eval_subs(&self, subs: &Vec<Expr>) -> Res<Vec<usize>> { + subs.iter().map(|v| self.eval_expr(v).map(|w| w.as_usize())).collect() + } + + /// Dimension an array. + fn array_dim(&mut self, var: &Var, dims: &Vec<Expr>) -> Res<()> { + let dims = try!(self.eval_subs(dims)); + match *var { + Var::A16(n, _) => self.tail[n].dimension(dims, 0), + Var::A32(n, _) => self.hybrid[n].dimension(dims, 0), + _ => return IE994.err(), + } + } + + /// Assign to a variable. + fn assign(&mut self, var: &Var, val: Val) -> Res<()> { + match *var { + Var::I16(n) => Ok(self.spot[n].assign(try!(val.as_u16()))), + Var::I32(n) => Ok(self.twospot[n].assign(val.as_u32())), + Var::A16(n, ref subs) => { + let subs = try!(self.eval_subs(subs)); + self.tail[n].set_md(subs, try!(val.as_u16()), 0) + } + Var::A32(n, ref subs) => { + let subs = try!(self.eval_subs(subs)); + self.hybrid[n].set_md(subs, val.as_u32(), 0) + } + } + } + + /// Look up the value of a variable. + fn lookup(&self, var: &Var) -> Res<Val> { + match *var { + Var::I16(n) => Ok(Val::I16(self.spot[n].val)), + Var::I32(n) => Ok(Val::I32(self.twospot[n].val)), + Var::A16(n, ref subs) => { + let subs = try!(self.eval_subs(subs)); + self.tail[n].get_md(subs, 0).map(Val::I16) + } + Var::A32(n, ref subs) => { + let subs = try!(self.eval_subs(subs)); + self.hybrid[n].get_md(subs, 0).map(Val::I32) + } + } + } + + /// Process a STASH statement. + fn stash(&mut self, var: &Var) { + match *var { + Var::I16(n) => self.spot[n].stash(), + Var::I32(n) => self.twospot[n].stash(), + Var::A16(n, _) => self.tail[n].stash(), + Var::A32(n, _) => self.hybrid[n].stash(), + } + } + + /// Process a RETRIEVE statement. + fn retrieve(&mut self, var: &Var) -> Res<()> { + match *var { + Var::I16(n) => self.spot[n].retrieve(0), + Var::I32(n) => self.twospot[n].retrieve(0), + Var::A16(n, _) => self.tail[n].retrieve(0), + Var::A32(n, _) => self.hybrid[n].retrieve(0), + } + } + + /// Process an IGNORE or REMEMBER statement. Cannot fail. + fn set_rw(&mut self, var: &Var, rw: bool) { + match *var { + Var::I16(n) => self.spot[n].rw = rw, + Var::I32(n) => self.twospot[n].rw = rw, + Var::A16(n, _) => self.tail[n].rw = rw, + Var::A32(n, _) => self.hybrid[n].rw = rw, + } + } + + /// P()rocess an ABSTAIN or REINSTATE statement. Cannot fail. + fn abstain(&mut self, what: &ast::Abstain, f: &Fn(u32) -> u32) { + if let &ast::Abstain::Label(lbl) = what { + let idx = self.program.labels[&lbl] as usize; + if self.program.stmts[idx].body != StmtBody::GiveUp { + self.abstain[idx] = f(self.abstain[idx]); + } + } else { + for (i, stype) in self.program.stmt_types.iter().enumerate() { + if stype == what { + self.abstain[i] = f(self.abstain[i]); + } + } + } + } + + /// Array readout helper. + fn array_readout(&mut self, var: &Var) -> Res<()> { + let state = &mut self.last_out; + match *var { + Var::A16(n, _) => self.tail[n].readout(self.stdout, state, 0), + Var::A32(n, _) => self.hybrid[n].readout(self.stdout, state, 0), + _ => return IE994.err(), + } + } + + /// Array writein helper. + fn array_writein(&mut self, var: &Var) -> Res<()> { + let state = &mut self.last_in; + match *var { + Var::A16(n, _) => self.tail[n].writein(state, 0), + Var::A32(n, _) => self.hybrid[n].writein(state, 0), + _ => return IE994.err(), + } + } + + /// Debug helpers. + fn dump_state(&self) { + self.dump_state_one(&self.spot, "."); + self.dump_state_one(&self.twospot, ":"); + self.dump_state_one(&self.tail, ","); + self.dump_state_one(&self.hybrid, ";"); + if self.jumps.len() > 0 { + println!("Next stack: {:?}", self.jumps); + } + //println!("Abstained: {:?}", self.abstain); + } + + fn dump_state_one<T: Debug + Display>(&self, vec: &Vec<Bind<T>>, sigil: &str) { + if vec.len() > 0 { + for (i, v) in vec.iter().enumerate() { + print!("{}{} = {}, ", sigil, i, v); + } + println!(""); + } + } +} diff --git a/tests/examplefiles/example.ezt b/tests/examplefiles/example.ezt new file mode 100644 index 00000000..fec2aa4c --- /dev/null +++ b/tests/examplefiles/example.ezt @@ -0,0 +1,32 @@ +* Easytrieve Plus example programm. + +* Environtment section. +PARM DEBUG(FLOW FLDCHK) + +* Library Section. +FILE PERSNL FB(150 1800) + NAME 17 8 A + EMP# 9 5 N * Note: '#' is a valid character for names. + DEPT 98 3 N. GROSS 94 4 P 2 + * ^ 2 field definitions in 1 line. + +* Call macro in example.mac. +FILE EXAMPLE FB(80 200) +%EXAMPLE SOMEFILE SOME + +* Activity Section. +JOB INPUT PERSNL NAME FIRST-PROGRAM START AT-START FINISH AT_FINISH + PRINT PAY-RPT +REPORT PAY-RPT LINESIZE 80 + TITLE 01 'PERSONNEL REPORT EXAMPLE-1' + LINE 01 DEPT NAME EMP# GROSS + +* Procedure declarations. +AT-START. PROC + DISPLAY 'PROCESSING...' +END-PROC + +AT-FINISH +PROC + DISPLAY 'DONE.' +END-PROC diff --git a/tests/examplefiles/example.jcl b/tests/examplefiles/example.jcl new file mode 100644 index 00000000..18d4ae37 --- /dev/null +++ b/tests/examplefiles/example.jcl @@ -0,0 +1,31 @@ +//IS198CPY JOB (PYGM-TEST-001),'PYGMENTS TEST JOB', +// CLASS=L,MSGCLASS=X,TIME=(00,10) +//* Copy 'OLDFILE' to 'NEWFILE'. +//COPY01 EXEC PGM=IEBGENER +//SYSPRINT DD SYSOUT=* +//SYSUT1 DD DSN=OLDFILE,DISP=SHR +//SYSUT2 DD DSN=NEWFILE, +// DISP=(NEW,CATLG,DELETE), +// SPACE=(CYL,(40,5),RLSE), Some comment +// DCB=(LRECL=115,BLKSIZE=1150) +//SYSIN DD DUMMY +/* +//* Test line continuation in strings. +//CONT01 EXEC PGM=IEFBR14,PARM='THIS IS A LONG PARAMETER WITHIN APOST +// ROPHES, CONTINUED IN COLUMN 15 OF THE NEXT RECORD' +//* Sort a couple of lines and show the result in the job log. +//SORT01 EXEC PGM=IEFBR14 +//SORTIN DD * +spam +eggs +ham +/* +//SORTOUT DD SYSOUT=* +/* +//* Test line continuation with comment at end of line continued by a +//* character at column 72 (in this case 'X'). +//STP4 EXEC PROC=BILLING,COND.PAID=((20,LT),EVEN), +// COND.LATE=(60,GT,FIND), +// COND.BILL=((20,GE),(30,LT,CHGE)) THIS STATEMENT CALLS THE X +// BILLING PROCEDURE AND SPECIFIES RETURN CODE TESTS FOR THREEX +// PROCEDURE STEPS. diff --git a/tests/examplefiles/example.mac b/tests/examplefiles/example.mac new file mode 100644 index 00000000..1c3831d1 --- /dev/null +++ b/tests/examplefiles/example.mac @@ -0,0 +1,6 @@ +* Example Easytrieve macro declaration. For an example on calling this +* macro, see example.ezt. +MACRO FILENAME PREFIX +&FILENAME. +&PREFIX.-LINE 1 80 A +&PREFIX.-KEY 1 8 A diff --git a/tests/examplefiles/example.scd b/tests/examplefiles/example.scd new file mode 100644 index 00000000..a27247e9 --- /dev/null +++ b/tests/examplefiles/example.scd @@ -0,0 +1,76 @@ +Instr("cs.fm.BasicFM", { + arg freq = 440, + amp = 0.9, + gate = 0, + carrierFreqRatio = 1.0, + modulatorFreqRatio = 1.0, + // not sure if having these defaults here actually does anything. + modEnvShape = Env.adsr( + attackTime: 0.05, + decayTime: 0.1, + sustainLevel: 0.5 * amp, + releaseTime: 0.1, + peakLevel: amp, + curve: [4, -4, -2] + ), + carrierEnvShape = Env.adsr( + attackTime: 0.05, + decayTime: 0.1, + sustainLevel: 0.5 * amp, + releaseTime: 0.1, + peakLevel: amp, + curve: [4, -4, -2] + ); + + var carrier, + modulator, + carrierEnv, + modEnv, + out; + + modEnv = EnvGen.kr( + envelope: modEnvShape, + gate: gate + ); + + modulator = modEnv * SinOsc.ar(freq * modulatorFreqRatio); + + // carrier sustains until noteoff + carrierEnvShape.releaseNode = 2; + + carrierEnv = EnvGen.kr( + envelope: carrierEnvShape, + gate: gate + ); + + carrier = carrierEnv * SinOsc.ar( + (freq * carrierFreqRatio) + (modulator * freq) + ); + + // free synth when both carrier and modulator envelopes are done + FreeSelf.kr(Done.kr(carrierEnv) + Done.kr(modEnv) - 1); + + out = amp * carrier; +}, [ + \freq.asSpec(), + \amp.asSpec(), + \nil, + ControlSpec(0.1, 10), + ControlSpec(0.1, 10), + EnvSpec(Env.adsr( + attackTime: 0.05, + decayTime: 0.1, + sustainLevel: 0.8, + releaseTime: 0.1, + peakLevel: 1.0, + curve: [4, -4, -2] + )), + EnvSpec(Env.adsr( + attackTime: 0.05, + decayTime: 0.1, + sustainLevel: 0.8, + releaseTime: 0.1, + peakLevel: 1.0, + curve: [4, -4, -2] + )) +]); diff --git a/tests/examplefiles/example.tf b/tests/examplefiles/example.tf new file mode 100644 index 00000000..d3f02779 --- /dev/null +++ b/tests/examplefiles/example.tf @@ -0,0 +1,162 @@ +variable "key_name" { + description = "Name of the SSH keypair to use in AWS." +} + +variable "key_path" { + description = "Path to the private portion of the SSH key specified." +} + +variable "aws_region" { + description = "AWS region to launch servers." + default = "us-west-2" + somevar = true +} + +# Ubuntu Precise 12.04 LTS (x64) +variable "aws_amis" { + default = { + eu-west-1 = "ami-b1cf19c6" + us-east-1 = "ami-de7ab6b6" + us-west-1 = "ami-3f75767a" + us-west-2 = "ami-21f78e11" + } +} + + + + + + +provider "aws" { + access_key = "${myvar}" + secret_key = "your aws secret key" + region = "us-east-1" +} +/* multiline + + comment + +*/ + + +# Single line comment +resource "aws_instance" "example" { + ami = "ami-408c7f28" + instance_type = "t1.micro" + key_name = "your-aws-key-name" +} + +# Create our Heroku application. Heroku will +# automatically assign a name. +resource "heroku_app" "web" {} + +# Create our DNSimple record to point to the +# heroku application. +resource "dnsimple_record" "web" { + domain = "${var.dnsimple_domain}" + + + # heroku_hostname is a computed attribute on the heroku + # application we can use to determine the hostname + value = "${heroku_app.web.heroku_hostname}" + + type = "CNAME" + ttl = 3600 +} + +# The Heroku domain, which will be created and added +# to the heroku application after we have assigned the domain +# in DNSimple +resource "heroku_domain" "foobar" { + app = "${heroku_app.web.name}" + hostname = "${dnsimple_record.web.hostname}" +} + + +# Specify the provider and access details +provider "aws" { + region = "${var.aws_region}" + value = ${file("path.txt")} +} + +# Our default security group to access +# the instances over SSH and HTTP +resource "aws_security_group" "default" { + name = "terraform_example" + description = "Used in the terraform" + + # SSH access from anywhere + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + + # HTTP access from anywhere + ingress { + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } +} + + +resource "aws_elb" "web" { + name = "terraform-example-elb" + + # The same availability zone as our instance + availability_zones = ["${aws_instance.web.availability_zone}"] + + listener { + instance_port = 80 + instance_protocol = "http" + lb_port = 80 + lb_protocol = "http" + } + + # The instance is registered automatically + instances = ["${aws_instance.web.id}"] +} + + +resource "aws_instance" "web" { + # The connection block tells our provisioner how to + # communicate with the resource (instance) + connection { + # The default username for our AMI + user = "ubuntu" + + # The path to your keyfile + key_file = "${var.key_path}" + } + + instance_type = "m1.small" + + # Lookup the correct AMI based on the region + # we specified + ami = "${lookup(var.aws_amis, var.aws_region)}" + + # The name of our SSH keypair you've created and downloaded + # from the AWS console. + # + # https://console.aws.amazon.com/ec2/v2/home?region=us-west-2#KeyPairs: + # + key_name = "${var.key_name}" + + # Our Security group to allow HTTP and SSH access + security_groups = ["${aws_security_group.default.name}"] + + # We run a remote provisioner on the instance after creating it. + # In this case, we just install nginx and start it. By default, + # this should be on port 80 + provisioner "remote-exec" { + inline = [ + "sudo apt-get -y update", + "sudo apt-get -y install nginx", + "sudo service nginx start" + ] + } +} + diff --git a/tests/examplefiles/example.ttl b/tests/examplefiles/example.ttl new file mode 100644 index 00000000..e524d86c --- /dev/null +++ b/tests/examplefiles/example.ttl @@ -0,0 +1,43 @@ +@base <http://example.com> . +@prefix dcterms: <http://purl.org/dc/terms/>. @prefix xs: <http://www.w3.org/2001/XMLSchema> . +@prefix mads: <http://www.loc.gov/mads/rdf/v1#> . +@prefix skos: <http://www.w3.org/2004/02/skos/core#> . +@PREFIX dc: <http://purl.org/dc/elements/1.1/> # SPARQL-like syntax is OK +@prefix : <http://xmlns.com/foaf/0.1/> . # empty prefix is OK + +<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green-goblin> . + +<#doc1> a <#document> + dc:creator "Smith", "Jones"; + :knows <http://getopenid.com/jsmith> + dcterms:hasPart [ # A comment + dc:title "Some title", "Some other title"; + dc:creator "برشت، برتولد"@ar; + dc:date "2009"^^xs:date + ]; + dc:title "A sample title", 23.0; + dcterms:isPartOf [ + dc:title "another", "title" + ] ; + :exists true . + +<http://data.ub.uio.no/realfagstermer/006839> a mads:Topic, + skos:Concept ; + dcterms:created "2014-08-25"^^xsd:date ; + dcterms:modified "2014-11-12"^^xsd:date ; + dcterms:identifier "REAL006839" ; + skos:prefLabel "Flerbørstemarker"@nb, + "Polychaeta"@la ; + skos:altLabel "Flerbørsteormer"@nb, + "Mangebørstemark"@nb, + "Mangebørsteormer"@nb, + "Havbørsteormer"@nb, + "Havbørstemarker"@nb, + "Polycheter"@nb. + skos:inScheme <http://data.ub.uio.no/realfagstermer/> ; + skos:narrower <http://data.ub.uio.no/realfagstermer/018529>, + <http://data.ub.uio.no/realfagstermer/024538>, + <http://data.ub.uio.no/realfagstermer/026723> ; + skos:exactMatch <http://ntnu.no/ub/data/tekord#NTUB17114>, + <http://dewey.info/class/592.62/e23/>, + <http://aims.fao.org/aos/agrovoc/c_29110> . diff --git a/tests/examplefiles/example1.cadl b/tests/examplefiles/example1.cadl new file mode 100644 index 00000000..3350fa3b --- /dev/null +++ b/tests/examplefiles/example1.cadl @@ -0,0 +1,149 @@ + -- + -- Example fragment of an openEHR Archetype, written in cADL, a subsyntax of the Archetype Definition Language (ADL) + -- definition available here: http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf + -- Author: Thomas Beale + -- + + EVALUATION[id1] matches { -- Adverse Reaction + data matches { + ITEM_TREE[id2] matches { + items cardinality matches {1..*; unordered} matches { + ELEMENT[id3] matches { -- Substance/Agent + value matches { + DV_TEXT[id51] + } + } + ELEMENT[id5] occurrences matches {0..1} matches { -- Absolute Contraindication? + value matches { + DV_BOOLEAN[id52] matches { + value matches {True} + } + } + } + ELEMENT[id50] occurrences matches {0..1} matches { -- Future Use + value matches { + DV_TEXT[id53] + } + } + ELEMENT[id7] occurrences matches {0..1} matches { -- Overall Comment + value matches { + DV_TEXT[id54] + } + } + CLUSTER[id10] matches { -- Reaction Event + items matches { + ELEMENT[id11] occurrences matches {0..1} matches { -- Specific Substance/Agent + value matches { + DV_TEXT[id55] + } + } + ELEMENT[id12] matches { -- Manifestation + value matches { + DV_TEXT[id56] + } + } + ELEMENT[id17] occurrences matches {0..1} matches { -- Reaction Type + value matches { + DV_TEXT[id57] + } + } + ELEMENT[id22] occurrences matches {0..1} matches { -- Certainty + value matches { + DV_CODED_TEXT[id58] matches { + defining_code matches {[ac1]} -- Certainty (synthesised) + } + } + } + ELEMENT[id13] occurrences matches {0..1} matches { -- Reaction Description + value matches { + DV_TEXT[id59] + } + } + ELEMENT[id28] occurrences matches {0..1} matches { -- Onset of Reaction + value matches { + DV_DATE_TIME[id60] + } + } + ELEMENT[id29] occurrences matches {0..1} matches { -- Duration of Reaction + value matches { + DV_DURATION[id61] + } + } + allow_archetype CLUSTER[id30] matches { -- Additional Reaction Detail + include + archetype_id/value matches {/openEHR-EHR-CLUSTER\.anatomical_location(-a-zA-Z0-9_]+)*\.v1/} + } + ELEMENT[id19] occurrences matches {0..1} matches { -- Exposure Description + value matches { + DV_TEXT[id62] + } + } + ELEMENT[id21] occurrences matches {0..1} matches { -- Earliest Exposure + value matches { + DV_DATE_TIME[id63] + } + } + ELEMENT[id26] occurrences matches {0..1} matches { -- Duration of Exposure + value matches { + DV_DURATION[id64] + } + } + allow_archetype CLUSTER[id20] matches { -- Additional Exposure Detail + include + archetype_id/value matches {/openEHR-EHR-CLUSTER\.amount(-a-zA-Z0-9_]+)*\.v1|openEHR-EHR-CLUSTER\.medication_admin(-a-zA-Z0-9_]+)*\.v1|openEHR-EHR-CLUSTER\.timing(-a-zA-Z0-9_]+)*\.v1/} + } + ELEMENT[id41] occurrences matches {0..1} matches { -- Clinical Management Description + value matches { + DV_TEXT[id65] + } + } + ELEMENT[id32] matches { -- Multimedia + value matches { + DV_MULTIMEDIA[id66] matches { + media_type + } + } + } + allow_archetype CLUSTER[id42] matches { -- Reporting Details + include + archetype_id/value matches {/.*/} + } + ELEMENT[id33] occurrences matches {0..1} matches { -- Reaction Comment + value matches { + DV_TEXT[id67] + } + } + } + } + } + } + } + protocol matches { + ITEM_TREE[id43] matches { + items matches { + ELEMENT[id45] occurrences matches {0..1} matches { -- Reaction Reported? + value matches { + DV_BOOLEAN[id68] matches { + value matches {True, False} + } + } + } + ELEMENT[id49] occurrences matches {0..1} matches { -- Report Comment + value matches { + DV_TEXT[id69] + } + } + ELEMENT[id46] matches { -- Adverse Reaction Report + value matches { + DV_URI[id70] + } + } + ELEMENT[id48] occurrences matches {0..1} matches { -- Supporting Clinical Record Information + value matches { + DV_EHR_URI[id71] + } + } + } + } + } + } diff --git a/tests/examplefiles/modula2_test_cases.def b/tests/examplefiles/modula2_test_cases.def new file mode 100644 index 00000000..ce86a55b --- /dev/null +++ b/tests/examplefiles/modula2_test_cases.def @@ -0,0 +1,354 @@ +(* Test Cases for Modula-2 Lexer *) + +(* Notes: + (1) Without dialect option nor embedded dialect tag, the lexer operates in + fallback mode, recognising the *combined* literals, punctuation symbols + and operators of all supported dialects, and the *combined* reserved + words and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10. + (1) If multiple embedded dialect tags are present, the lexer will use the + first valid tag and ignore any subsequent dialect tags in the file. + (2) An embedded dialect tag overrides any command line dialect option. *) + + +(* Testing command line dialect option *) + +(* for PIM Modula-2 : pygmentize -O full,dialect=m2pim ... + for ISO Modula-2 : pygmentize -O full,dialect=m2iso ... + for Modula-2 R10 : pygmentize -O full,dialect=m2r10 ... + for Objective Modula-2 : pygmentize -O full,dialect=objm2 ... *) + +(* for Aglet extensions : pygmentize -O full,dialect=m2iso+aglet ... + for GNU extensions : pygmentize -O full,dialect=m2pim+gm2 ... + for p1 extensions : pygmentize -O full,dialect=m2iso+p1 ... + for XDS extensions : pygmentize -O full,dialect=m2iso+xds ... + + +(* Testing embedded dialect tags *) + +(* !m2pim*) (* <-- remove whitespace before ! for PIM Modula-2 *) +(* !m2iso*) (* <-- remove whitespace before ! for ISO Modula-2 *) +(* !m2r10*) (* <-- remove whitespace before ! for Modula-2 R10 *) +(* !objm2*) (* <-- remove whitespace before ! for Objective Modula-2 *) + +(* !m2iso+aglet*) (* <-- remove whitespace before ! for Aglet extensions *) +(* !m2pim+gm2*) (* <-- remove whitespace before ! for GNU extensions *) +(* !m2iso+p1*) (* <-- remove whitespace before ! for p1 extensions *) +(* !m2iso+xds*) (* <-- remove whitespace before ! for XDS extensions *) + + +(* Dialect Indicating Names *) + +(* recognised names should be highlighted *) + +QUALIFIED (* PIM and ISO *) + +PACKEDSET (* ISO only *) + +ARGLIST (* M2 R10 and ObjM2 *) + +BYCOPY (* ObjM2 only *) + +BITSET8 (* Aglet, GNU and M2 R10 *) + +__FILE__ (* GNU only *) + +BCD (* p1 and M2 R10 *) + +SEQ (* XDS only *) + + +(* Literal Tests *) + +(* recognised literals should be rendered as one unit + unrecognised literals should be rendered as error *) + +ch := 'a'; ch := "a"; (* all dialects *) +ch := 0u20; unich := 0u2038 (* M2 R10 *) + +s := 'The cat said "meow!".'; +s := "It is eight O'clock."; + + +n := 123; n = 1000000; (* all dialects *) +n := 123; n = 1'000'000; (* M2 R10 *) + +n := 0b0110; n:= 0b0110'1100'0111; (* M2 R10 *) +n := 0xFF00; n:= 0xDEAD'BEEF'0F00; (* M2 R10 *) + +r := 1.23; r := 1000000.000001; (* all dialects *) +r := 1.23; r := 1'000'000.000'001; (* M2 R10 *) + +r := 1.234E6; r:= 1.234E-6; r := 1.234567E1000; (* PIM + ISO *) +r := 1.234e6; r:= 1.234e-6; r := 1.234'567e1'000; (* M2 R10 *) + +ch := 0377C; n := 0377B; n := 07FF0H; (* ISO + PIM *) + + +(* Non-Alphabetic Operator Tests *) + +(* supported operators should be rendered as one unit + unsupported operators should be rendered as errors *) + +a := b + c - d * e / f; (* all dialects *) + +SetDiff := A \ B; (* M2 R10 *) + +dotProduct := v1 *. v2; catArray := array1 +> array2; (* M2 R10 *) + +bool := a = b; bool := a > b; bool := a < b; +bool := a # b; bool := a >= b; bool := a <= b; + +bool := a <> b; (* PIM + ISO *) + +bool := a == b; (* M2 R10 *) + +(*&*) IF a & b THEN ... END; (* PIM + ISO *) + +(*~*) IF ~ b THEN ... END; (* PIM + ISO *) + +(*::*) int := real :: INTEGER; (* M2 R10 *) + +(*++*) FOR i++ IN range DO ... END; (* M2 R10 *) +(*--*) FOR i-- IN range DO ... END; (* M2 R10 *) + +(*^*) next := this^.next; (* all dialects *) +(*@*) next := this@.next; (* ISO *) + +(*`*) str := `NSString alloc init; (* ObjM2 *) + + +(* Punctuation Tests *) + +(* supported punctuation should be rendered as one unit + unsupported punctuation should be rendered as an error *) + +(*.*) Foo.Bar.Baz; (*..*) TYPE Sign = [-1..1] OF INTEGER; + +(*|:*) CASE foo OF | 1 : bar | 2 : bam | 3 : boo END; +(*!:*) CASE foo OF 1 : bar ! 2 : bam ! 3 : boo END; (* ISO *) + +(*[]()*) array[n] := foo(); + +(*{}*) CONST Bar = { 1, 2, 3 }; + +(*?*) TPROPERTIES = isCollection, isIndexed | isRigid?; (* M2 R10 *) + +(*~*) CONST ~ isFoobar = Foo AND Bar; (* M2 R10 *) +(*->*) isFoobar -> PROCEDURE [ABS]; (* M2 R10 *) + +(*<<>>*) GENLIB Foo FROM Template FOR Bar = <<ARRAY OF CHAR>> END; (* M2 R10 *) + + +(* Single Line Comment Test *) + +(* should be rendered as comment if supported, as error if unsupported *) + +// This is a single line comment (M2 R10 + ObjM2) + + +(* Pragma Delimiter Tests *) + +(* PIM style pragma should be rendered as pragma in PIM dialects, + as multiline comment in all other dialects. *) + +(*$INLINE*) (* PIM *) + +(* ISO style pragma should be rendered as error in PIM dialects, + as pragma in all other dialects. *) + +<*INLINE*> (* all other dialects *) + + +(* Operator Substitution Test When in Algol mode *) + +IF foo # bar THEN ... END; (* # should be rendered as not equal symbol *) + +IF foo >= bar THEN ... END; (* >= should be rendered as not less symbol *) + +IF foo <= bar THEN ... END; (* <= should be rendered as not greater symbol *) + +IF foo == bar THEN ... END; (* == should be rendered as identity symbol *) + +dotProduct := v1 *. v2; (* *. should be rendered as dot product symbol *) + + +(* Reserved Words and Builtins Test *) + +(* supported reserved words and builtins should be highlighted *) + +(* reserved words common to all dialects *) + +AND ARRAY BEGIN BY CASE CONST DEFINITION DIV DO ELSE ELSIF END EXIT FOR FROM +IF IMPLEMENTATION IMPORT IN LOOP MOD MODULE NOT OF OR POINTER PROCEDURE +RECORD REPEAT RETURN SET THEN TO TYPE UNTIL VAR WHILE + +(* builtins common to all dialects *) + +ABS BOOLEAN CARDINAL CHAR CHR FALSE INTEGER LONGINT LONGREAL +MAX MIN NIL ODD ORD REAL TRUE + +(* pseudo builtins common to all dialects *) + +ADDRESS BYTE WORD ADR + + +(* additional reserved words for PIM *) + +EXPORT QUALIFIED WITH + +(* additional builtins for PIM *) + +BITSET CAP DEC DISPOSE EXCL FLOAT HALT HIGH INC INCL NEW NIL PROC SIZE TRUNC VAL + +(* additional pseudo-builtins for PIM *) + +SYSTEM PROCESS TSIZE NEWPROCESS TRANSFER + + +(* additional reserved words for ISO 10514-1 *) + +EXCEPT EXPORT FINALLY FORWARD PACKEDSET QUALIFIED REM RETRY WITH + +(* additional reserved words for ISO 10514-2 & ISO 10514-3 *) + +ABSTRACT AS CLASS GUARD INHERIT OVERRIDE READONLY REVEAL TRACED UNSAFEGUARDED + +(* additional builtins for ISO 10514-1 *) + +BITSET CAP CMPLX COMPLEX DEC DISPOSE EXCL FLOAT HALT HIGH IM INC INCL INT +INTERRUPTIBLE LENGTH LFLOAT LONGCOMPLEX NEW PROC PROTECTION RE SIZE TRUNC +UNINTERRUBTIBLE VAL + +(* additional builtins for ISO 10514-2 & ISO 10514-3 *) + +CREATE DESTROY EMPTY ISMEMBER SELF + + +(* additional pseudo-builtins for ISO *) + +(* SYSTEM *) +SYSTEM BITSPERLOC LOCSPERBYTE LOCSPERWORD LOC ADDADR SUBADR DIFADR MAKEADR +ADR ROTATE SHIFT CAST TSIZE + +(* COROUTINES *) +COROUTINES ATTACH COROUTINE CURRENT DETACH HANDLER INTERRUPTSOURCE IOTRANSFER +IsATTACHED LISTEN NEWCOROUTINE PROT TRANSFER + +(* EXCEPTIONS *) +EXCEPTIONS AllocateSource CurrentNumber ExceptionNumber ExceptionSource +GetMessage IsCurrentSource IsExceptionalExecution RAISE + +(* TERMINATION *) +TERMINATION IsTerminating HasHalted + +(* M2EXCEPTION *) +M2EXCEPTION M2Exceptions M2Exception IsM2Exception indexException rangeException +caseSelectException invalidLocation functionException wholeValueException +wholeDivException realValueException realDivException complexValueException +complexDivException protException sysException coException exException + + +(* additional reserved words for M2 R10 *) + +ALIAS ARGLIST BLUEPRINT COPY GENLIB INDETERMINATE NEW NONE OPAQUE REFERENTIAL +RELEASE RETAIN + +(* with symbolic assembler language extension *) +ASM REG + +(* additional builtins for M2 R10 *) + +CARDINAL COUNT EMPTY EXISTS INSERT LENGTH LONGCARD OCTET PTR PRED READ READNEW +REMOVE RETRIEVE SORT STORE SUBSET SUCC TLIMIT TMAX TMIN TRUE TSIZE UNICHAR +WRITE WRITEF + +(* additional pseudo-builtins for M2 R10 *) + +(* TPROPERTIES *) +TPROPERTIES PROPERTY LITERAL TPROPERTY TLITERAL TBUILTIN TDYN TREFC TNIL +TBASE TPRECISION TMAXEXP TMINEXP + +(* CONVERSION *) +CONVERSION TSXFSIZE SXF VAL + +(* UNSAFE *) +UNSAFE CAST INTRINSIC AVAIL ADD SUB ADDC SUBC FETCHADD FETCHSUB SHL SHR ASHR +ROTL ROTR ROTLC ROTRC BWNOT BWAND BWOR BWXOR BWNAND BWNOR SETBIT TESTBIT +LSBIT MSBIT CSBITS BAIL HALT TODO FFI ADDR VARGLIST VARGC + +(* ATOMIC *) +ATOMIC INTRINSIC AVAIL SWAP CAS INC DEC BWAND BWNAND BWOR BWXOR + +(* COMPILER *) +COMPILER DEBUG MODNAME PROCNAME LINENUM DEFAULT HASH + +(* ASSEMBLER *) +ASSEMBLER REGISTER SETREG GETREG CODE + + +(* standard library ADT identifiers for M2 R10 *) + +(* rendered as builtins when dialect is set to Modula-2 R10, + this can be turned off by option treat_stdlib_adts_as_builtins=off *) +BCD LONGBCD BITSET SHORTBITSET LONGBITSET LONGLONGBITSET COMPLEX LONGCOMPLEX +SHORTCARD LONGLONGCARD SHORTINT LONGLONGINT POSINT SHORTPOSINT LONGPOSINT +LONGLONGPOSINT BITSET8 BITSET16 BITSET32 BITSET64 BITSET128 BS8 BS16 BS32 +BS64 BS128 CARDINAL8 CARDINAL16 CARDINAL32 CARDINAL64 CARDINAL128 CARD8 +CARD16 CARD32 CARD64 CARD128 INTEGER8 INTEGER16 INTEGER32 INTEGER64 +INTEGER128 INT8 INT16 INT32 INT64 INT128 STRING UNISTRING + + +(* additional reserved words for ObjM2 *) + +(* Note: ObjM2 is a superset of M2 R10 *) + +BYCOPY BYREF CLASS CONTINUE CRITICAL INOUT METHOD ON OPTIONAL OUT PRIVATE +PROTECTED PROTOCOL PUBLIC SUPER TRY + +(* additional builtins for ObjM2 *) + +OBJECT NO YES + + +(* additional builtins for Aglet Extensions to ISO *) + +BITSET8 BITSET16 BITSET32 CARDINAL8 CARDINAL16 CARDINAL32 INTEGER8 INTEGER16 +INTEGER32 + + +(* additional reserved words for GNU Extensions to PIM *) + +ASM __ATTRIBUTE__ __BUILTIN__ __COLUMN__ __DATE__ __FILE__ __FUNCTION__ +__LINE__ __MODULE__ VOLATILE + +(* additional builtins for GNU Extensions to PIM *) + +BITSET8 BITSET16 BITSET32 CARDINAL8 CARDINAL16 CARDINAL32 CARDINAL64 COMPLEX32 +COMPLEX64 COMPLEX96 COMPLEX128 INTEGER8 INTEGER16 INTEGER32 INTEGER64 REAL8 +REAL16 REAL32 REAL96 REAL128 THROW + + +(* additional pseudo-builtins for p1 Extensions to ISO *) + +BCD + + +(* additional reserved words for XDS Extensions to ISO *) + +SEQ + +(* additional builtins for XDS Extensions to ISO *) + +ASH ASSERT DIFFADR_TYPE ENTIER INDEX LEN LONGCARD SHORTCARD SHORTINT + +(* additional pseudo-builtins for XDS Extensions to ISO *) + +(* SYSTEM *) +PROCESS NEWPROCESS BOOL8 BOOL16 BOOL32 CARD8 CARD16 CARD32 INT8 INT16 INT32 +REF MOVE FILL GET PUT CC int unsigned size_t void + +(* COMPILER *) +COMPILER OPTION EQUATION + + +(* end of file *)
\ No newline at end of file diff --git a/tests/examplefiles/robotframework_test.txt b/tests/examplefiles/robotframework_test.txt index 63ba63e6..0d8179c0 100644 --- a/tests/examplefiles/robotframework_test.txt +++ b/tests/examplefiles/robotframework_test.txt @@ -6,6 +6,7 @@ Test Setup Keyword argument argument with ${VARIABLE} *** Variables *** ${VARIABLE} Variable value @{LIST} List variable here +&{DICT} Key1=Value1 Key2=Value2 *** Test Cases *** Keyword-driven example diff --git a/tests/examplefiles/rust_example.rs b/tests/examplefiles/rust_example.rs deleted file mode 100644 index 8c44af1d..00000000 --- a/tests/examplefiles/rust_example.rs +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// based on: -// http://shootout.alioth.debian.org/u32/benchmark.php?test=nbody&lang=java - -/* nest some /* comments */ */ - -extern mod std; - -use core::os; - -// Using sqrt from the standard library is way slower than using libc -// directly even though std just calls libc, I guess it must be -// because the the indirection through another dynamic linker -// stub. Kind of shocking. Might be able to make it faster still with -// an llvm intrinsic. -#[nolink] -extern mod libc { - #![legacy_exports]; - fn sqrt(n: float) -> float; -} - -fn main() { - let args = os::args(); - let args = if os::getenv(~"RUST_BENCH").is_some() { - ~[~"", ~"4000000"] - } else if args.len() <= 1u { - ~[~"", ~"100000"] - } else { - args - }; - let n = int::from_str(args[1]).get(); - let mut bodies: ~[Body::props] = NBodySystem::make(); - io::println(fmt!("%f", NBodySystem::energy(bodies))); - let mut i = 0; - while i < n { - NBodySystem::advance(bodies, 0.01); - i += 1; - } - io::println(fmt!("%f", NBodySystem::energy(bodies))); -} - -mod NBodySystem { - use Body; - - pub fn make() -> ~[Body::props] { - let mut bodies: ~[Body::props] = - ~[Body::sun(), - Body::jupiter(), - Body::saturn(), - Body::uranus(), - Body::neptune()]; - - let mut px = 0.0; - let mut py = 0.0; - let mut pz = 0.0; - - let mut i = 0; - while i < 5 { - px += bodies[i].vx * bodies[i].mass; - py += bodies[i].vy * bodies[i].mass; - pz += bodies[i].vz * bodies[i].mass; - - i += 1; - } - - // side-effecting - Body::offset_momentum(&mut bodies[0], px, py, pz); - - return bodies; - } - - pub fn advance(bodies: &mut [Body::props], dt: float) { - let mut i = 0; - while i < 5 { - let mut j = i + 1; - while j < 5 { - advance_one(&mut bodies[i], - &mut bodies[j], dt); - j += 1; - } - - i += 1; - } - - i = 0; - while i < 5 { - move_(&mut bodies[i], dt); - i += 1; - } - } - - pub fn advance_one(bi: &mut Body::props, - bj: &mut Body::props, - dt: float) unsafe { - let dx = bi.x - bj.x; - let dy = bi.y - bj.y; - let dz = bi.z - bj.z; - - let dSquared = dx * dx + dy * dy + dz * dz; - - let distance = ::libc::sqrt(dSquared); - let mag = dt / (dSquared * distance); - - bi.vx -= dx * bj.mass * mag; - bi.vy -= dy * bj.mass * mag; - bi.vz -= dz * bj.mass * mag; - - bj.vx += dx * bi.mass * mag; - bj.vy += dy * bi.mass * mag; - bj.vz += dz * bi.mass * mag; - } - - pub fn move_(b: &mut Body::props, dt: float) { - b.x += dt * b.vx; - b.y += dt * b.vy; - b.z += dt * b.vz; - } - - pub fn energy(bodies: &[Body::props]) -> float unsafe { - let mut dx; - let mut dy; - let mut dz; - let mut distance; - let mut e = 0.0; - - let mut i = 0; - while i < 5 { - e += - 0.5 * bodies[i].mass * - (bodies[i].vx * bodies[i].vx + bodies[i].vy * bodies[i].vy - + bodies[i].vz * bodies[i].vz); - - let mut j = i + 1; - while j < 5 { - dx = bodies[i].x - bodies[j].x; - dy = bodies[i].y - bodies[j].y; - dz = bodies[i].z - bodies[j].z; - - distance = ::libc::sqrt(dx * dx + dy * dy + dz * dz); - e -= bodies[i].mass * bodies[j].mass / distance; - - j += 1; - } - - i += 1; - } - return e; - - } -} - -mod Body { - use Body; - - pub const PI: float = 3.141592653589793; - pub const SOLAR_MASS: float = 39.478417604357432; - // was 4 * PI * PI originally - pub const DAYS_PER_YEAR: float = 365.24; - - pub type props = - {mut x: float, - mut y: float, - mut z: float, - mut vx: float, - mut vy: float, - mut vz: float, - mass: float}; - - pub fn jupiter() -> Body::props { - return {mut x: 4.84143144246472090e+00, - mut y: -1.16032004402742839e+00, - mut z: -1.03622044471123109e-01, - mut vx: 1.66007664274403694e-03 * DAYS_PER_YEAR, - mut vy: 7.69901118419740425e-03 * DAYS_PER_YEAR, - mut vz: -6.90460016972063023e-05 * DAYS_PER_YEAR, - mass: 9.54791938424326609e-04 * SOLAR_MASS}; - } - - pub fn saturn() -> Body::props { - return {mut x: 8.34336671824457987e+00, - mut y: 4.12479856412430479e+00, - mut z: -4.03523417114321381e-01, - mut vx: -2.76742510726862411e-03 * DAYS_PER_YEAR, - mut vy: 4.99852801234917238e-03 * DAYS_PER_YEAR, - mut vz: 2.30417297573763929e-05 * DAYS_PER_YEAR, - mass: 2.85885980666130812e-04 * SOLAR_MASS}; - } - - pub fn uranus() -> Body::props { - return {mut x: 1.28943695621391310e+01, - mut y: -1.51111514016986312e+01, - mut z: -2.23307578892655734e-01, - mut vx: 2.96460137564761618e-03 * DAYS_PER_YEAR, - mut vy: 2.37847173959480950e-03 * DAYS_PER_YEAR, - mut vz: -2.96589568540237556e-05 * DAYS_PER_YEAR, - mass: 4.36624404335156298e-05 * SOLAR_MASS}; - } - - pub fn neptune() -> Body::props { - return {mut x: 1.53796971148509165e+01, - mut y: -2.59193146099879641e+01, - mut z: 1.79258772950371181e-01, - mut vx: 2.68067772490389322e-03 * DAYS_PER_YEAR, - mut vy: 1.62824170038242295e-03 * DAYS_PER_YEAR, - mut vz: -9.51592254519715870e-05 * DAYS_PER_YEAR, - mass: 5.15138902046611451e-05 * SOLAR_MASS}; - } - - pub fn sun() -> Body::props { - return {mut x: 0.0, - mut y: 0.0, - mut z: 0.0, - mut vx: 0.0, - mut vy: 0.0, - mut vz: 0.0, - mass: SOLAR_MASS}; - } - - pub fn offset_momentum(props: &mut Body::props, - px: float, py: float, pz: float) { - props.vx = -px / SOLAR_MASS; - props.vy = -py / SOLAR_MASS; - props.vz = -pz / SOLAR_MASS; - } - -} diff --git a/tests/examplefiles/test.adls b/tests/examplefiles/test.adls new file mode 100644 index 00000000..1cdb2daf --- /dev/null +++ b/tests/examplefiles/test.adls @@ -0,0 +1,313 @@ +-- +-- Example of an openEHR Archetype, written in the Archetype Definition Language (ADL) +-- Definition available here: http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf +-- Author: derived from the openEHR-EHR-EVALUATION.adverse_reaction.v1 archetype at http://www.openEHR.org/ckm +-- + +archetype (adl_version=2.0.5; rm_release=1.0.2; generated) + openEHR-EHR-EVALUATION.adverse_reaction.v1.0.0 + +language + original_language = <[ISO_639-1::en]> + +description + lifecycle_state = <"unmanaged"> + original_author = < + ["name"] = <"Heather Leslie"> + ["organisation"] = <"Ocean Informatics"> + ["email"] = <"heather.leslie@oceaninformatics.com"> + ["date"] = <"2010-11-08"> + > + copyright = <"© openEHR Foundation"> + details = < + ["en"] = < + language = <[ISO_639-1::en]> + purpose = <"To record information about any harmful..."> + use = <"Use to record all information about the presence ..."> + keywords = <"reaction", "allergy", "allergic", "adverse"> + misuse = <"Not to be used for recording the absence (or ..."> + > + > + other_contributors = <"Jane Doe, Australia"> + other_details = < + ["references"] = <"Adverse Reaction, draft archetype, ..."> + ["MD5-CAM-1.0.1"] = <"260699D2EFDE4F7C7BC3C6C501A51A61"> + > + +definition + EVALUATION[id1] matches { -- Adverse Reaction + data matches { + ITEM_TREE[id2] matches { + items cardinality matches {1..*; unordered} matches { + ELEMENT[id3] matches { -- Substance/Agent + value matches { + DV_TEXT[id51] + } + } + ELEMENT[id5] occurrences matches {0..1} matches { -- Absolute Contraindication? + value matches { + DV_BOOLEAN[id52] matches { + value matches {True} + } + } + } + ELEMENT[id50] occurrences matches {0..1} matches { -- Future Use + value matches { + DV_TEXT[id53] + } + } + ELEMENT[id7] occurrences matches {0..1} matches { -- Overall Comment + value matches { + DV_TEXT[id54] + } + } + CLUSTER[id10] matches { -- Reaction Event + items matches { + ELEMENT[id11] occurrences matches {0..1} matches { -- Specific Substance/Agent + value matches { + DV_TEXT[id55] + } + } + ELEMENT[id12] matches { -- Manifestation + value matches { + DV_TEXT[id56] + } + } + ELEMENT[id17] occurrences matches {0..1} matches { -- Reaction Type + value matches { + DV_TEXT[id57] + } + } + ELEMENT[id22] occurrences matches {0..1} matches { -- Certainty + value matches { + DV_CODED_TEXT[id58] matches { + defining_code matches {[ac1]} -- Certainty (synthesised) + } + } + } + ELEMENT[id13] occurrences matches {0..1} matches { -- Reaction Description + value matches { + DV_TEXT[id59] + } + } + ELEMENT[id28] occurrences matches {0..1} matches { -- Onset of Reaction + value matches { + DV_DATE_TIME[id60] + } + } + ELEMENT[id29] occurrences matches {0..1} matches { -- Duration of Reaction + value matches { + DV_DURATION[id61] + } + } + allow_archetype CLUSTER[id30] matches { -- Additional Reaction Detail + include + archetype_id/value matches {/openEHR-EHR-CLUSTER\.anatomical_location(-a-zA-Z0-9_]+)*\.v1/} + } + ELEMENT[id19] occurrences matches {0..1} matches { -- Exposure Description + value matches { + DV_TEXT[id62] + } + } + ELEMENT[id21] occurrences matches {0..1} matches { -- Earliest Exposure + value matches { + DV_DATE_TIME[id63] + } + } + ELEMENT[id26] occurrences matches {0..1} matches { -- Duration of Exposure + value matches { + DV_DURATION[id64] + } + } + allow_archetype CLUSTER[id20] matches { -- Additional Exposure Detail + include + archetype_id/value matches {/openEHR-EHR-CLUSTER\.amount(-a-zA-Z0-9_]+)*\.v1|openEHR-EHR-CLUSTER\.medication_admin(-a-zA-Z0-9_]+)*\.v1|openEHR-EHR-CLUSTER\.timing(-a-zA-Z0-9_]+)*\.v1/} + } + ELEMENT[id41] occurrences matches {0..1} matches { -- Clinical Management Description + value matches { + DV_TEXT[id65] + } + } + ELEMENT[id32] matches { -- Multimedia + value matches { + DV_MULTIMEDIA[id66] matches { + media_type + } + } + } + allow_archetype CLUSTER[id42] matches { -- Reporting Details + include + archetype_id/value matches {/.*/} + } + ELEMENT[id33] occurrences matches {0..1} matches { -- Reaction Comment + value matches { + DV_TEXT[id67] + } + } + } + } + } + } + } + protocol matches { + ITEM_TREE[id43] matches { + items matches { + ELEMENT[id45] occurrences matches {0..1} matches { -- Reaction Reported? + value matches { + DV_BOOLEAN[id68] matches { + value matches {True, False} + } + } + } + ELEMENT[id49] occurrences matches {0..1} matches { -- Report Comment + value matches { + DV_TEXT[id69] + } + } + ELEMENT[id46] matches { -- Adverse Reaction Report + value matches { + DV_URI[id70] + } + } + ELEMENT[id48] occurrences matches {0..1} matches { -- Supporting Clinical Record Information + value matches { + DV_EHR_URI[id71] + } + } + } + } + } + } + +terminology + term_definitions = < + ["en"] = < + ["id1"] = < + text = <"Adverse Reaction"> + description = <"A harmful or undesirable, unexpected effect associated with exposure to any substance or agent, including food, plants, animals, venom from animal stings, or a medication at therapeutic or sub-therapeutic doses."> + > + ["id3"] = < + text = <"Substance/Agent"> + description = <"Identification of a substance, agent, or a class of substance, that is considered to be responsible for the Adverse Reaction."> + > + ["id5"] = < + text = <"Absolute Contraindication?"> + description = <"Is administration of this Substance/Agent absolutely contraindicated in this individual?"> + > + ["id7"] = < + text = <"Overall Comment"> + description = <"Additional narrative about the Adverse Reaction as a whole, not captured in other fields."> + > + ["id10"] = < + text = <"Reaction Event"> + description = <"Details about each Adverse Reaction Event."> + > + ["id11"] = < + text = <"Specific Substance/Agent"> + description = <"Specific identification of the actual Substance/Agent considered to be responsible for the Adverse Reaction event."> + > + ["id12"] = < + text = <"Manifestation"> + description = <"Clinical manifestation of the Adverse Reaction expressed as a single word, phrase or brief description, e.g. nausea or rash."> + > + ["id13"] = < + text = <"Reaction Description"> + description = <"Narrative description of the Adverse Reaction."> + > + ["id17"] = < + text = <"Reaction Type"> + description = <"The type of Adverse Reaction as determined by the clinician."> + > + ["id19"] = < + text = <"Exposure Description"> + description = <"Description about exposure to the Substance/Agent."> + > + ["id20"] = < + text = <"Additional Exposure Detail"> + description = <"Additional detail about exposure/s for this Adverse Reaction event, including structured medication amount/frequency/route information."> + > + ["id21"] = < + text = <"Earliest Exposure"> + description = <"Record of the date and/or time of the earliest or initial exposure to the Substance/Agent."> + > + ["id22"] = < + text = <"Certainty"> + description = <"Degree of certainty, as assessed by a clinician, that the specific Substance/Agent was the cause of the Adverse Reaction."> + > + ["at23"] = < + text = <"Suspected"> + description = <"Possibly the causative agent."> + > + ["at24"] = < + text = <"Probable"> + description = <"Likely to be the causative agent, but not confirmed by testing or rechallenge."> + > + ["at25"] = < + text = <"Confirmed"> + description = <"Confirmed as the causative agent, by testing or rechallenge."> + > + ["id26"] = < + text = <"Duration of Exposure"> + description = <"The amount of time of exposure to the Substance/Agent."> + > + ["id28"] = < + text = <"Onset of Reaction"> + description = <"Record of the date and/or time of the onset of the Adverse Reaction."> + > + ["id29"] = < + text = <"Duration of Reaction"> + description = <"The amount of time that the Adverse Reaction was present."> + > + ["id30"] = < + text = <"Additional Reaction Detail"> + description = <"Additional detail about the Adverse Reaction, including anatomical location."> + > + ["id32"] = < + text = <"Multimedia"> + description = <"Inclusion of any multimedia file to support the recording of the Adverse Reaction event."> + > + ["id33"] = < + text = <"Reaction Comment"> + description = <"Additional narrative about the Adverse Reaction event not captured in other fields."> + > + ["id41"] = < + text = <"Clinical Management Description"> + description = <"Narrative description of the clinical management provided."> + > + ["id42"] = < + text = <"Reporting Details"> + description = <"Further details required for reporting to regulatory bodies."> + > + ["id45"] = < + text = <"Reaction Reported?"> + description = <"Was the Adverse Reaction reported to a regulatory body?"> + > + ["id46"] = < + text = <"Adverse Reaction Report"> + description = <"Link to an Adverse Reaction Report sent to a regulatory body."> + > + ["id48"] = < + text = <"Supporting Clinical Record Information"> + description = <"Link to further information about the presentation and findings that exist elsewhere in the health record, including allergy test reports."> + > + ["id49"] = < + text = <"Report Comment"> + description = <"Additional narrative about the Adverse Reaction Report, including the reason for non-reporting, if required."> + > + ["id50"] = < + text = <"Future Use"> + description = <"Narrative description of clinician instructions or advice related to future exposure to, or administration of, the Substance/Agent."> + > + ["ac1"] = < + text = <"Certainty (synthesised)"> + description = <"Degree of certainty, as assessed by a clinician, that the specific Substance/Agent was the cause of the Adverse Reaction. (synthesised)"> + > + > + > + value_sets = < + ["ac1"] = < + id = <"ac1"> + members = <"at23", "at24", "at25"> + > + > + diff --git a/tests/examplefiles/test.bpl b/tests/examplefiles/test.bpl new file mode 100644 index 00000000..add25e1a --- /dev/null +++ b/tests/examplefiles/test.bpl @@ -0,0 +1,140 @@ +/* + * Test Boogie rendering +*/ + +const N: int; +axiom 0 <= N; + +procedure foo() { + break; +} +// array to sort as global array, because partition & quicksort have to +var a: [int] int; +var original: [int] int; +var perm: [int] int; + +// Is array a of length N sorted? +function is_sorted(a: [int] int, l: int, r: int): bool +{ + (forall j, k: int :: l <= j && j < k && k <= r ==> a[j] <= a[k]) +} + +// is range a[l:r] unchanged? +function is_unchanged(a: [int] int, b: [int] int, l: int, r: int): bool { + (forall i: int :: l <= i && i <= r ==> a[i] == b[i]) +} + +function is_permutation(a: [int] int, original: [int] int, perm: [int] int, N: int): bool +{ + (forall k: int :: 0 <= k && k < N ==> 0 <= perm[k] && perm[k] < N) && + (forall k, j: int :: 0 <= k && k < j && j < N ==> perm[k] != perm[j]) && + (forall k: int :: 0 <= k && k < N ==> a[k] == original[perm[k]]) +} + +function count(a: [int] int, x: int, N: int) returns (int) +{ if N == 0 then 0 else if a[N-1] == x then count(a, x, N - 1) + 1 else count(a, x, N-1) } + + +/* +function count(a: [int] int, x: int, N: int) returns (int) +{ if N == 0 then 0 else if a[N-1] == x then count(a, x, N - 1) + 1 else count(a, x, N-1) } + +function is_permutation(a: [int] int, b: [int] int, l: int, r: int): bool { + (forall i: int :: l <= i && i <= r ==> count(a, a[i], r+1) == count(b, a[i], r+1)) +} +*/ + +procedure partition(l: int, r: int, N: int) returns (p: int) + modifies a, perm; + requires N > 0; + requires l >= 0 && l < r && r < N; + requires ((r+1) < N) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] <= a[r+1]); + requires ((l-1) >= 0) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] > a[l-1]); + + /* a is a permutation of the original array original */ + requires is_permutation(a, original, perm, N); + + ensures (forall k: int :: (k >= l && k <= p ) ==> a[k] <= a[p]); + ensures (forall k: int :: (k > p && k <= r ) ==> a[k] > a[p]); + ensures p >= l && p <= r; + ensures is_unchanged(a, old(a), 0, l-1); + ensures is_unchanged(a, old(a), r+1, N); + ensures ((r+1) < N) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] <= a[r+1]); + ensures ((l-1) >= 0) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] > a[l-1]); + + /* a is a permutation of the original array original */ + ensures is_permutation(a, original, perm, N); +{ + var i: int; + var sv: int; + var pivot: int; + var tmp: int; + + i := l; + sv := l; + pivot := a[r]; + + while (i < r) + invariant i <= r && i >= l; + invariant sv <= i && sv >= l; + invariant pivot == a[r]; + invariant (forall k: int :: (k >= l && k < sv) ==> a[k] <= old(a[r])); + invariant (forall k: int :: (k >= sv && k < i) ==> a[k] > old(a[r])); + + /* a is a permutation of the original array original */ + invariant is_permutation(a, original, perm, N); + + invariant is_unchanged(a, old(a), 0, l-1); + invariant is_unchanged(a, old(a), r+1, N); + invariant ((r+1) < N) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] <= a[r+1]); + invariant ((l-1) >= 0) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] > a[l-1]); + { + if ( a[i] <= pivot) { + tmp := a[i]; a[i] := a[sv]; a[sv] := tmp; + tmp := perm[i]; perm[i] := perm[sv]; perm[sv] := tmp; + sv := sv +1; + } + i := i + 1; + } + + //swap + tmp := a[i]; a[i] := a[sv]; a[sv] := tmp; + tmp := perm[i]; perm[i] := perm[sv]; perm[sv] := tmp; + + p := sv; +} + + +procedure quicksort(l: int, r: int, N: int) + modifies a, perm; + + requires N > 0; + requires l >= 0 && l < r && r < N; + requires ((r+1) < N) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] <= a[r+1]); + requires ((l-1) >= 0) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] > a[l-1]); + + /* a is a permutation of the original array original */ + requires is_permutation(a, original, perm, N); + + ensures ((r+1) < N) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] <= a[r+1]); + ensures ((l-1) >= 0) ==> (forall k: int :: (k >= l && k <= r) ==> a[k] > a[l-1]); + + ensures is_unchanged(a, old(a), 0, l-1); + ensures is_unchanged(a, old(a), r+1, N); + ensures is_sorted(a, l, r); + + /* a is a permutation of the original array original */ + ensures is_permutation(a, original, perm, N); +{ + var p: int; + + call p := partition(l, r, N); + + if ((p-1) > l) { + call quicksort(l, p-1, N); + } + + if ((p+1) < r) { + call quicksort(p+1, r, N); + } +} diff --git a/tests/examplefiles/test.cadl b/tests/examplefiles/test.cadl new file mode 100644 index 00000000..5c3f4881 --- /dev/null +++ b/tests/examplefiles/test.cadl @@ -0,0 +1,32 @@ + -- + -- Example fragment of an openEHR Archetype, written in cADL, a subsyntax of the Archetype Definition Language (ADL) + -- definition available here: http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf + -- Author: Thomas Beale + -- + + EVALUATION[id1] matches { -- Adverse Reaction + data matches { + ITEM_TREE[id2] matches { + items cardinality matches {1..*; unordered} matches { + ELEMENT[id3] matches { -- Substance/Agent + value matches { + DV_TEXT[id51] + } + } + ELEMENT[id50] occurrences matches {0..1} matches { -- Future Use + value matches { + DV_TEXT[id53] + } + } + CLUSTER[id10] matches { -- Reaction Event + items matches { + allow_archetype CLUSTER[id30] matches { -- Additional Reaction Detail + include + archetype_id/value matches {/openEHR-EHR-CLUSTER\.anatomical_location(-a-zA-Z0-9_]+)*\.v1/} + } + } + } + } + } + } + } diff --git a/tests/examplefiles/test.gradle b/tests/examplefiles/test.gradle new file mode 100644 index 00000000..0bc834c1 --- /dev/null +++ b/tests/examplefiles/test.gradle @@ -0,0 +1,20 @@ +apply plugin: 'java' + +repositories { + mavenCentral() +} + +dependencies { + testCompile 'junit:junit:4.12' +} + +task sayHello << { + def x = SomeClass.worldString + println "Hello ${x}" +} + +private class SomeClass { + public static String getWorldString() { + return "world" + } +} diff --git a/tests/examplefiles/test.odin b/tests/examplefiles/test.odin new file mode 100644 index 00000000..05b01d22 --- /dev/null +++ b/tests/examplefiles/test.odin @@ -0,0 +1,43 @@ +-- +-- Example of a fragment of an openEHR Archetype, written in the Object Data Instance Notation (ODIN) +-- Definition available here: https://github.com/openEHR/odin +-- Author: Thomas Beale +-- + + original_author = < + ["name"] = <"Dr J Joyce"> + ["organisation"] = <"NT Health Service"> + ["date"] = <2003-08-03> + > + term_bindings = < + ["umls"] = < + ["id1"] = <http://umls.nlm.edu/id/C124305> -- apgar result + ["id2"] = <http://umls.nlm.edu/id/0000000> -- 1-minute event + > + > + lifecycle_state = <"initial"> + resource_package_uri = <"http://www.aihw.org.au/data_sets/diabetic_archetypes.html"> + + details = < + ["en"] = < + language = <[iso_639-1::en]> + purpose = <"archetype for diabetic patient review"> + use = <"used for all hospital or clinic-based diabetic reviews, + including first time. Optional sections are removed according to the particular review" + > + misuse = <"not appropriate for pre-diagnosis use"> + original_resource_uri = <"http://www.healthdata.org.au/data_sets/diabetic_review_data_set_1.html"> + > + ["de"] = < + language = <[iso_639-1::de]> + purpose = <"Archetyp für die Untersuchung von Patienten mit Diabetes"> + use = <"wird benutzt für alle Diabetes-Untersuchungen im + Krankenhaus, inklusive der ersten Vorstellung. Optionale + Abschnitte werden in Abhängigkeit von der speziellen + Vorstellung entfernt." + > + misuse = <"nicht geeignet für Benutzung vor Diagnosestellung"> + original_resource_uri = <"http://www.healthdata.org.au/data_sets/diabetic_review_data_set_1.html"> + > + > + diff --git a/tests/examplefiles/test.psl b/tests/examplefiles/test.psl new file mode 100644 index 00000000..3ac99498 --- /dev/null +++ b/tests/examplefiles/test.psl @@ -0,0 +1,182 @@ +// This is a comment + +// 1. Basics + +// Functions +func Add(X : Univ_Integer; Y : Univ_Integer) -> Univ_Integer is + return X + Y; +end func Add; +// End of line semi-colons are optional +// +, +=, -, -=, *, *=, /, /= +// all do what you'd expect (/ is integer division) + +// If you find Univ_Integer to be too verbose you can import Short_Names +// which defines aliases like Int for Univ_Integer and String for Univ_String +import PSL::Short_Names::*, * + +func Greetings() is + const S : String := "Hello, World!" + Println(S) +end func Greetings +// All declarations are 'const', 'var', or 'ref' +// Assignment is :=, equality checks are ==, and != is not equals + +func Boolean_Examples(B : Bool) is + const And := B and #true // Parallel execution of operands + const And_Then := B and then #true // Short-Circuit + const Or := B or #false // Parallel execution of operands + const Or_Else := B or else #false // Short-Cirtuit + const Xor := B xor #true + var Result : Bool := #true; + Result and= #false; + Result or= #true; + Result xor= #false; +end func Boolean_Examples +// Booleans are a special type of enumeration +// All enumerations are preceded by a sharp '#' + +func Fib(N : Int) {N >= 0} -> Int is + if N <= 1 then + return N + else + // Left and right side of '+' are computed in Parallel here + return Fib(N - 1) + Fib(N - 2) + end if +end func Fib +// '{N >= 0}' is a precondition to this function +// Preconditions are built in to the language and checked by the compiler + +// ParaSail does not have mutable global variables +// Instead, use 'var' parameters +func Increment_All(var Nums : Vector<Int>) is + for each Elem of Nums concurrent loop + Elem += 1 + end loop +end func Increment_All +// The 'concurrent' keyword in the loop header tells the compiler that +// iterations of the loop can happen in any order. +// It will choose the most optimal number of threads to use. +// Other options are 'forward' and 'reverse'. + +func Sum_Of_Squares(N : Int) -> Int is + // The type of Sum is inferred + var Sum := 0 + for I in 1 .. N forward loop + Sum += I ** 2 // ** is exponentiation + end loop +end func Sum_Of_Squares + +func Sum_Of(N : Int; Map : func (Int) -> Int) -> Int is + return (for I in 1 .. N => <0> + Map(I)) +end func Sum_Of +// It has functional aspects as well +// Here, we're taking an (Int) -> Int function as a parameter +// and using the inherently parallel map-reduce. +// Initial value is enclosed with angle brackets + +func main(Args : Basic_Array<String>) is + Greetings() // Hello World + Println(Fib(5)) // 5 + // Container Comprehension + var Vec : Vector<Int> := [for I in 0 .. 10 {I mod 2 == 0} => I ** 2] + // Vec = [0, 4, 16, 36, 64, 100] + Increment_All(Vec) + // Vec = [1, 5, 17, 37, 65, 101] + // '|' is an overloaded operator. + // It's usually used for concatenation or adding to a container + Println("First: " | Vec[1] | ", Last: " | Vec[Length(Vec)]); + // Vectors are 1 indexed, 0 indexed ZVectors are also available + + Println(Sum_Of_Squares(3)) + + // Sum of fibs! + Println(Sum_Of(10, Fib)) +end func main + +// Preceding a type with 'optional' allows it to take the value 'null' +func Divide(A, B, C : Real) -> optional Real is + // Real is the floating point type + const Epsilon := 1.0e-6; + if B in -Epsilon .. Epsilon then + return null + elsif C in -Epsilon .. Epsilon then + return null + else + return A / B + A / C + end if +end func Divide + +// 2. Modules +// Modules are composed of an interface and a class +// ParaSail has object orientation features + +// modules can be defined as 'concurrent' +// which allows 'locked' and 'queued' parameters +concurrent interface Locked_Box<Content_Type is Assignable<>> is + // Create a box with the given content + func Create(C : optional Content_Type) -> Locked_Box; + + // Put something into the box + func Put(locked var B : Locked_Box; C : Content_Type); + + // Get a copy of current content + func Content(locked B : Locked_Box) -> optional Content_Type; + + // Remove current content, leaving it null + func Remove(locked var B : Locked_Box) -> optional Content_Type; + + // Wait until content is non-null, then return it, leaving it null. + func Get(queued var B : Locked_Box) -> Content_Type; +end interface Locked_Box; + +concurrent class Locked_Box is + var Content : optional Content_Type; +exports + func Create(C : optional Content_Type) -> Locked_Box is + return (Content => C); + end func Create; + + func Put(locked var B : Locked_Box; C : Content_Type) is + B.Content := C; + end func Put; + + func Content(locked B : Locked_Box) -> optional Content_Type is + return B.Content; + end func Content; + + func Remove(locked var B : Locked_Box) -> Result : optional Content_Type is + // '<==' is the move operator + // It moves the right operand into the left operand, + // leaving the right null. + Result <== B.Content; + end func Remove; + + func Get(queued var B : Locked_Box) -> Result : Content_Type is + queued until B.Content not null then + Result <== B.Content; + end func Get; +end class Locked_Box; + +func Use_Box(Seed : Univ_Integer) is + var U_Box : Locked_Box<Univ_Integer> := Create(null); + // The type of 'Ran' can be left out because + // it is inferred from the return type of Random::Start + var Ran := Random::Start(Seed); + + Println("Starting 100 pico-threads trying to put something in the box"); + Println(" or take something out."); + for I in 1..100 concurrent loop + if I < 30 then + Println("Getting out " | Get(U_Box)); + else + Println("Putting in " | I); + U_Box.Put(I); + + // The first parameter can be moved to the front with a dot + // X.Foo(Y) is equivalent to Foo(X, Y) + end if; + end loop; + + Println("And the winner is: " | Remove(U_Box)); + Println("And the box is now " | Content(U_Box)); +end func Use_Box; diff --git a/tests/examplefiles/test2.odin b/tests/examplefiles/test2.odin new file mode 100644 index 00000000..2a6b4517 --- /dev/null +++ b/tests/examplefiles/test2.odin @@ -0,0 +1,30 @@ +school_schedule = < + lesson_times = <08:30:00, 09:30:00, 10:30:00, ...> + + locations = < + [1] = <"under the big plane tree"> + [2] = <"under the north arch"> + [3] = <"in a garden"> + > + + subjects = < + ["philosophy:plato"] = < -- note construction of key + name = <"philosophy"> + teacher = <"plato"> + topics = <"meta-physics", "natural science"> + weighting = <76%> + > + ["philosophy:kant"] = < + name = <"philosophy"> + teacher = <"kant"> + topics = <"meaning and reason", "meta-physics", "ethics"> + weighting = <80%> + > + ["art"] = < + name = <"art"> + teacher = <"goya"> + topics = <"technique", "portraiture", "satire"> + weighting = <78%> + > + > +> diff --git a/tests/examplefiles/test_basic.adls b/tests/examplefiles/test_basic.adls new file mode 100644 index 00000000..df5aa743 --- /dev/null +++ b/tests/examplefiles/test_basic.adls @@ -0,0 +1,28 @@ +-- +-- Example of an openEHR Archetype, written in the Archetype Definition Language (ADL) +-- Definition available here: http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf +-- Author: derived from the openEHR-EHR-EVALUATION.adverse_reaction.v1 archetype at http://www.openEHR.org/ckm +-- + +archetype (adl_version=2.0.5; rm_release=1.0.2; generated) + openEHR-EHR-EVALUATION.adverse_reaction.v1.0.0 + +language + original_language = <[ISO_639-1::en]> + +description + lifecycle_state = <"unmanaged"> + +definition + EVALUATION[id1] + +terminology + term_definitions = < + ["en"] = < + ["id1"] = < + text = <"Adverse Reaction"> + description = <"xxx"> + > + > + > + diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index c1e83077..5883fb5c 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -151,7 +151,7 @@ class CmdLineTest(unittest.TestCase): o = self.check_success('-Fhighlight:tokentype=Name.Blubb,' 'names=TESTFILE filename', '-fhtml', filename) - self.assertTrue('<span class="n-Blubb' in o) + self.assertTrue('<span class="n n-Blubb' in o) def test_H_opt(self): o = self.check_success('-H', 'formatter', 'html') diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py index 7457d045..bb667c05 100644 --- a/tests/test_lexers_other.py +++ b/tests/test_lexers_other.py @@ -6,14 +6,12 @@ :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ - import glob import os import unittest from pygments.lexers import guess_lexer -from pygments.lexers.scripting import RexxLexer - +from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer def _exampleFilePath(filename): return os.path.join(os.path.dirname(__file__), 'examplefiles', filename) @@ -36,7 +34,24 @@ class AnalyseTextTest(unittest.TestCase): self.assertEqual(guessedLexer.name, lexer.name) def testCanRecognizeAndGuessExampleFiles(self): - self._testCanRecognizeAndGuessExampleFiles(RexxLexer) + LEXERS_TO_TEST = [ + EasytrieveLexer, + JclLexer, + RexxLexer, + ] + for lexerToTest in LEXERS_TO_TEST: + self._testCanRecognizeAndGuessExampleFiles(lexerToTest) + + +class EasyTrieveLexerTest(unittest.TestCase): + def testCanGuessFromText(self): + self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text( + '*\n *\n\n \n*\n MACRO')) class RexxLexerTest(unittest.TestCase): diff --git a/tests/test_shell.py b/tests/test_shell.py index fd5009b0..4eb5a15a 100644 --- a/tests/test_shell.py +++ b/tests/test_shell.py @@ -61,3 +61,29 @@ class BashTest(unittest.TestCase): ] self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + def testShortVariableNames(self): + fragment = u'x="$"\ny="$_"\nz="$abc"\n' + tokens = [ + # single lone $ + (Token.Name.Variable, u'x'), + (Token.Operator, u'='), + (Token.Literal.String.Double, u'"'), + (Token.Text, u'$'), + (Token.Literal.String.Double, u'"'), + (Token.Text, u'\n'), + # single letter shell var + (Token.Name.Variable, u'y'), + (Token.Operator, u'='), + (Token.Literal.String.Double, u'"'), + (Token.Name.Variable, u'$_'), + (Token.Literal.String.Double, u'"'), + (Token.Text, u'\n'), + # multi-letter user var + (Token.Name.Variable, u'z'), + (Token.Operator, u'='), + (Token.Literal.String.Double, u'"'), + (Token.Name.Variable, u'$abc'), + (Token.Literal.String.Double, u'"'), + (Token.Text, u'\n'), + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py new file mode 100644 index 00000000..07337cd5 --- /dev/null +++ b/tests/test_terminal_formatter.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" + Pygments terminal formatter tests + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from __future__ import print_function + +import unittest +import re + +from pygments.util import StringIO +from pygments.lexers.sql import PlPgsqlLexer +from pygments.formatters import TerminalFormatter + +DEMO_TEXT = '''\ +-- comment +select +* from bar; +''' +DEMO_LEXER = PlPgsqlLexer +DEMO_TOKENS = list(DEMO_LEXER().get_tokens(DEMO_TEXT)) + +ANSI_RE = re.compile(r'\x1b[\w\W]*?m') + +def strip_ansi(x): + return ANSI_RE.sub('', x) + +class TerminalFormatterTest(unittest.TestCase): + def test_reasonable_output(self): + out = StringIO() + TerminalFormatter().format(DEMO_TOKENS, out) + plain = strip_ansi(out.getvalue()) + self.assertEqual(DEMO_TEXT.count('\n'), plain.count('\n')) + print(repr(plain)) + + for a, b in zip(DEMO_TEXT.splitlines(), plain.splitlines()): + self.assertEqual(a, b) + + def test_reasonable_output_lineno(self): + out = StringIO() + TerminalFormatter(linenos=True).format(DEMO_TOKENS, out) + plain = strip_ansi(out.getvalue()) + self.assertEqual(DEMO_TEXT.count('\n') + 1, plain.count('\n')) + print(repr(plain)) + + for a, b in zip(DEMO_TEXT.splitlines(), plain.splitlines()): + self.assertTrue(a in b) diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..8a33f99c --- /dev/null +++ b/tox.ini @@ -0,0 +1,7 @@ +[tox] +envlist = py26, py27, py33, py34 +[testenv] +deps = + nose + coverage +commands = python -d tests/run.py {posargs} |
