diff options
| author | Tim Hatch <tim@timhatch.com> | 2015-10-13 11:34:10 -0700 |
|---|---|---|
| committer | Tim Hatch <tim@timhatch.com> | 2015-10-13 11:34:10 -0700 |
| commit | fe5643e7d13809ca6eae7ec7e95c03bea6012226 (patch) | |
| tree | eb9cdcfe0a2dadff5a76f4bcc96acb2a72a1f160 /pygments | |
| parent | 483deba6920adf5c56a318acffb88c890656bc70 (diff) | |
| parent | 27e304015dc3882ddb59122e168765775fb0e014 (diff) | |
| download | pygments-fe5643e7d13809ca6eae7ec7e95c03bea6012226.tar.gz | |
Merged in hanachin/pygments-main/add-ruby-filename (pull request #450)
Add Gemfile to RubyLexer filenames
Diffstat (limited to 'pygments')
47 files changed, 3109 insertions, 392 deletions
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index 55548d30..b22be54f 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -140,7 +140,7 @@ class HtmlFormatter(Formatter): When `tagsfile` is set to the path of a ctags index file, it is used to generate hyperlinks from names to their definition. You must enable - `anchorlines` and run ctags with the `-n` option for this to work. The + `lineanchors` and run ctags with the `-n` option for this to work. The `python-ctags` module from PyPI must be installed to use this feature; otherwise a `RuntimeError` will be raised. @@ -428,6 +428,15 @@ class HtmlFormatter(Formatter): return self.classprefix + ttypeclass return '' + def _get_css_classes(self, ttype): + """Return the css classes of this token type prefixed with + the classprefix option.""" + cls = self._get_css_class(ttype) + while ttype not in STANDARD_TYPES: + ttype = ttype.parent + cls = self._get_css_class(ttype) + ' ' + cls + return cls + def _create_stylesheet(self): t2c = self.ttype2class = {Token: ''} c2s = self.class2style = {} @@ -711,7 +720,7 @@ class HtmlFormatter(Formatter): cclass = getcls(ttype) cspan = cclass and '<span style="%s">' % c2s[cclass][0] or '' else: - cls = self._get_css_class(ttype) + cls = self._get_css_classes(ttype) cspan = cls and '<span class="%s">' % cls or '' parts = value.translate(escape_table).split('\n') diff --git a/pygments/formatters/terminal.py b/pygments/formatters/terminal.py index 3c4b025f..a6eb48a4 100644 --- a/pygments/formatters/terminal.py +++ b/pygments/formatters/terminal.py @@ -101,51 +101,35 @@ class TerminalFormatter(Formatter): def _write_lineno(self, outfile): self._lineno += 1 - outfile.write("\n%04d: " % self._lineno) - - def _format_unencoded_with_lineno(self, tokensource, outfile): - self._write_lineno(outfile) - - for ttype, value in tokensource: - if value.endswith("\n"): - self._write_lineno(outfile) - value = value[:-1] - color = self.colorscheme.get(ttype) - while color is None: - ttype = ttype[:-1] - color = self.colorscheme.get(ttype) - if color: - color = color[self.darkbg] - spl = value.split('\n') - for line in spl[:-1]: - self._write_lineno(outfile) - if line: - outfile.write(ansiformat(color, line[:-1])) - if spl[-1]: - outfile.write(ansiformat(color, spl[-1])) - else: - outfile.write(value) - - outfile.write("\n") + outfile.write("%s%04d: " % (self._lineno != 1 and '\n' or '', self._lineno)) + + def _get_color(self, ttype): + # self.colorscheme is a dict containing usually generic types, so we + # have to walk the tree of dots. The base Token type must be a key, + # even if it's empty string, as in the default above. + colors = self.colorscheme.get(ttype) + while colors is None: + ttype = ttype.parent + colors = self.colorscheme.get(ttype) + return colors[self.darkbg] def format_unencoded(self, tokensource, outfile): if self.linenos: - self._format_unencoded_with_lineno(tokensource, outfile) - return + self._write_lineno(outfile) for ttype, value in tokensource: - color = self.colorscheme.get(ttype) - while color is None: - ttype = ttype[:-1] - color = self.colorscheme.get(ttype) - if color: - color = color[self.darkbg] - spl = value.split('\n') - for line in spl[:-1]: - if line: - outfile.write(ansiformat(color, line)) - outfile.write('\n') - if spl[-1]: - outfile.write(ansiformat(color, spl[-1])) - else: - outfile.write(value) + color = self._get_color(ttype) + + for line in value.splitlines(True): + if color: + outfile.write(ansiformat(color, line.rstrip('\n'))) + else: + outfile.write(line.rstrip('\n')) + if line.endswith('\n'): + if self.linenos: + self._write_lineno(outfile) + else: + outfile.write('\n') + + if self.linenos: + outfile.write("\n") diff --git a/pygments/lexer.py b/pygments/lexer.py index 07e81033..581508b0 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -14,7 +14,6 @@ from __future__ import print_function import re import sys import time -import itertools from pygments.filter import apply_filters, Filter from pygments.filters import get_filter_by_name diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py index 333ff630..7d0b89d4 100644 --- a/pygments/lexers/__init__.py +++ b/pygments/lexers/__init__.py @@ -88,7 +88,7 @@ def get_lexer_by_name(_alias, **options): return _lexer_cache[name](**options) # continue with lexers from setuptools entrypoints for cls in find_plugin_lexers(): - if _alias in cls.aliases: + if _alias.lower() in cls.aliases: return cls(**options) raise ClassNotFound('no lexer for alias %r found' % _alias) diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 99461c55..2b4124a6 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -21,6 +21,7 @@ LEXERS = { 'ActionScript3Lexer': ('pygments.lexers.actionscript', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')), 'ActionScriptLexer': ('pygments.lexers.actionscript', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), 'AdaLexer': ('pygments.lexers.pascal', 'Ada', ('ada', 'ada95', 'ada2005'), ('*.adb', '*.ads', '*.ada'), ('text/x-ada',)), + 'AdlLexer': ('pygments.lexers.archetype', 'ADL', ('adl',), ('*.adl', '*.adls', '*.adlf', '*.adlx'), ()), 'AgdaLexer': ('pygments.lexers.haskell', 'Agda', ('agda',), ('*.agda',), ('text/x-agda',)), 'AlloyLexer': ('pygments.lexers.dsls', 'Alloy', ('alloy',), ('*.als',), ('text/x-alloy',)), 'AmbientTalkLexer': ('pygments.lexers.ambient', 'AmbientTalk', ('at', 'ambienttalk', 'ambienttalk/2'), ('*.at',), ('text/x-ambienttalk',)), @@ -50,6 +51,7 @@ LEXERS = { 'BlitzBasicLexer': ('pygments.lexers.basic', 'BlitzBasic', ('blitzbasic', 'b3d', 'bplus'), ('*.bb', '*.decls'), ('text/x-bb',)), 'BlitzMaxLexer': ('pygments.lexers.basic', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), + 'BoogieLexer': ('pygments.lexers.esoteric', 'Boogie', ('boogie',), ('*.bpl',), ()), 'BrainfuckLexer': ('pygments.lexers.esoteric', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), 'BroLexer': ('pygments.lexers.dsls', 'Bro', ('bro',), ('*.bro',), ()), 'BugsLexer': ('pygments.lexers.modeling', 'BUGS', ('bugs', 'winbugs', 'openbugs'), ('*.bug',), ()), @@ -59,6 +61,7 @@ LEXERS = { 'CSharpAspxLexer': ('pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()), 'CSharpLexer': ('pygments.lexers.dotnet', 'C#', ('csharp', 'c#'), ('*.cs',), ('text/x-csharp',)), 'Ca65Lexer': ('pygments.lexers.asm', 'ca65 assembler', ('ca65',), ('*.s',), ()), + 'CadlLexer': ('pygments.lexers.archetype', 'cADL', ('cadl',), ('*.cadl',), ()), 'CbmBasicV2Lexer': ('pygments.lexers.basic', 'CBM BASIC V2', ('cbmbas',), ('*.bas',), ()), 'CeylonLexer': ('pygments.lexers.jvm', 'Ceylon', ('ceylon',), ('*.ceylon',), ('text/x-ceylon',)), 'Cfengine3Lexer': ('pygments.lexers.configs', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()), @@ -110,6 +113,7 @@ LEXERS = { 'DylanLidLexer': ('pygments.lexers.dylan', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)), 'ECLLexer': ('pygments.lexers.ecl', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)), 'ECLexer': ('pygments.lexers.c_like', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')), + 'EasytrieveLexer': ('pygments.lexers.scripting', 'Easytrieve', ('easytrieve',), ('*.ezt', '*.mac'), ('text/x-easytrieve',)), 'EbnfLexer': ('pygments.lexers.parsers', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)), 'EiffelLexer': ('pygments.lexers.eiffel', 'Eiffel', ('eiffel',), ('*.e',), ('text/x-eiffel',)), 'ElixirConsoleLexer': ('pygments.lexers.erlang', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)), @@ -126,7 +130,8 @@ LEXERS = { 'FancyLexer': ('pygments.lexers.ruby', 'Fancy', ('fancy', 'fy'), ('*.fy', '*.fancypack'), ('text/x-fancysrc',)), 'FantomLexer': ('pygments.lexers.fantom', 'Fantom', ('fan',), ('*.fan',), ('application/x-fantom',)), 'FelixLexer': ('pygments.lexers.felix', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)), - 'FortranLexer': ('pygments.lexers.fortran', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)), + 'FortranFixedLexer': ('pygments.lexers.fortran', 'FortranFixed', ('fortranfixed',), ('*.f', '*.F'), ()), + 'FortranLexer': ('pygments.lexers.fortran', 'Fortran', ('fortran',), ('*.f03', '*.f90', '*.F03', '*.F90'), ('text/x-fortran',)), 'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()), 'GAPLexer': ('pygments.lexers.algebra', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()), 'GLShaderLexer': ('pygments.lexers.graphics', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)), @@ -142,7 +147,7 @@ LEXERS = { 'GosuLexer': ('pygments.lexers.jvm', 'Gosu', ('gosu',), ('*.gs', '*.gsx', '*.gsp', '*.vark'), ('text/x-gosu',)), 'GosuTemplateLexer': ('pygments.lexers.jvm', 'Gosu Template', ('gst',), ('*.gst',), ('text/x-gosu-template',)), 'GroffLexer': ('pygments.lexers.markup', 'Groff', ('groff', 'nroff', 'man'), ('*.[1234567]', '*.man'), ('application/x-troff', 'text/troff')), - 'GroovyLexer': ('pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy',), ('text/x-groovy',)), + 'GroovyLexer': ('pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy', '*.gradle'), ('text/x-groovy',)), 'HamlLexer': ('pygments.lexers.html', 'Haml', ('haml',), ('*.haml',), ('text/x-haml',)), 'HandlebarsHtmlLexer': ('pygments.lexers.templates', 'HTML+Handlebars', ('html+handlebars',), ('*.handlebars', '*.hbs'), ('text/html+handlebars', 'text/x-handlebars-template')), 'HandlebarsLexer': ('pygments.lexers.templates', 'Handlebars', ('handlebars',), (), ()), @@ -175,9 +180,10 @@ LEXERS = { 'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('js+django', 'javascript+django', 'js+jinja', 'javascript+jinja'), (), ('application/x-javascript+django', 'application/x-javascript+jinja', 'text/x-javascript+django', 'text/x-javascript+jinja', 'text/javascript+django', 'text/javascript+jinja')), 'JavascriptErbLexer': ('pygments.lexers.templates', 'JavaScript+Ruby', ('js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby'), (), ('application/x-javascript+ruby', 'text/x-javascript+ruby', 'text/javascript+ruby')), 'JavascriptGenshiLexer': ('pygments.lexers.templates', 'JavaScript+Genshi Text', ('js+genshitext', 'js+genshi', 'javascript+genshitext', 'javascript+genshi'), (), ('application/x-javascript+genshi', 'text/x-javascript+genshi', 'text/javascript+genshi')), - 'JavascriptLexer': ('pygments.lexers.javascript', 'JavaScript', ('js', 'javascript'), ('*.js',), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript')), + 'JavascriptLexer': ('pygments.lexers.javascript', 'JavaScript', ('js', 'javascript'), ('*.js', '*.jsm'), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript')), 'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')), 'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')), + 'JclLexer': ('pygments.lexers.scripting', 'JCL', ('jcl',), ('*.jcl',), ('text/x-jcl',)), 'JsonLdLexer': ('pygments.lexers.data', 'JSON-LD', ('jsonld', 'json-ld'), ('*.jsonld',), ('application/ld+json',)), 'JsonLexer': ('pygments.lexers.data', 'JSON', ('json',), ('*.json',), ('application/json',)), 'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)), @@ -194,6 +200,7 @@ LEXERS = { 'LassoLexer': ('pygments.lexers.javascript', 'Lasso', ('lasso', 'lassoscript'), ('*.lasso', '*.lasso[89]'), ('text/x-lasso',)), 'LassoXmlLexer': ('pygments.lexers.templates', 'XML+Lasso', ('xml+lasso',), (), ('application/xml+lasso',)), 'LeanLexer': ('pygments.lexers.theorem', 'Lean', ('lean',), ('*.lean',), ('text/x-lean',)), + 'LessCssLexer': ('pygments.lexers.css', 'LessCss', ('less',), ('*.less',), ('text/x-less-css',)), 'LighttpdConfLexer': ('pygments.lexers.configs', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)), 'LimboLexer': ('pygments.lexers.inferno', 'Limbo', ('limbo',), ('*.b',), ('text/limbo',)), 'LiquidLexer': ('pygments.lexers.templates', 'liquid', ('liquid',), ('*.liquid',), ()), @@ -221,7 +228,7 @@ LEXERS = { 'MatlabSessionLexer': ('pygments.lexers.matlab', 'Matlab session', ('matlabsession',), (), ()), 'MiniDLexer': ('pygments.lexers.d', 'MiniD', ('minid',), (), ('text/x-minidsrc',)), 'ModelicaLexer': ('pygments.lexers.modeling', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), - 'Modula2Lexer': ('pygments.lexers.pascal', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), + 'Modula2Lexer': ('pygments.lexers.modula2', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), 'MoinWikiLexer': ('pygments.lexers.markup', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), 'MonkeyLexer': ('pygments.lexers.basic', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)), 'MoonScriptLexer': ('pygments.lexers.scripting', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')), @@ -258,10 +265,12 @@ LEXERS = { 'ObjectiveJLexer': ('pygments.lexers.javascript', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)), 'OcamlLexer': ('pygments.lexers.ml', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)), 'OctaveLexer': ('pygments.lexers.matlab', 'Octave', ('octave',), ('*.m',), ('text/octave',)), + 'OdinLexer': ('pygments.lexers.archetype', 'ODIN', ('odin',), ('*.odin',), ('text/odin',)), 'OocLexer': ('pygments.lexers.ooc', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)), 'OpaLexer': ('pygments.lexers.ml', 'Opa', ('opa',), ('*.opa',), ('text/x-opa',)), 'OpenEdgeLexer': ('pygments.lexers.business', 'OpenEdge ABL', ('openedge', 'abl', 'progress'), ('*.p', '*.cls'), ('text/x-openedge', 'application/x-openedge')), 'PanLexer': ('pygments.lexers.dsls', 'Pan', ('pan',), ('*.pan',), ()), + 'ParaSailLexer': ('pygments.lexers.parasail', 'ParaSail', ('parasail',), ('*.psi', '*.psl'), ('text/x-parasail',)), 'PawnLexer': ('pygments.lexers.pawn', 'Pawn', ('pawn',), ('*.p', '*.pwn', '*.inc'), ('text/x-pawn',)), 'Perl6Lexer': ('pygments.lexers.perl', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t'), ('text/x-perl6', 'application/x-perl6')), 'PerlLexer': ('pygments.lexers.perl', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm', '*.t'), ('text/x-perl', 'application/x-perl')), @@ -285,7 +294,7 @@ LEXERS = { 'PythonLexer': ('pygments.lexers.python', 'Python', ('python', 'py', 'sage'), ('*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage'), ('text/x-python', 'application/x-python')), 'PythonTracebackLexer': ('pygments.lexers.python', 'Python Traceback', ('pytb',), ('*.pytb',), ('text/x-python-traceback',)), 'QBasicLexer': ('pygments.lexers.basic', 'QBasic', ('qbasic', 'basic'), ('*.BAS', '*.bas'), ('text/basic',)), - 'QmlLexer': ('pygments.lexers.webmisc', 'QML', ('qml',), ('*.qml',), ('application/x-qml',)), + 'QmlLexer': ('pygments.lexers.webmisc', 'QML', ('qml', 'qbs'), ('*.qml', '*.qbs'), ('application/x-qml', 'application/x-qt.qbs+qml')), 'RConsoleLexer': ('pygments.lexers.r', 'RConsole', ('rconsole', 'rout'), ('*.Rout',), ()), 'RPMSpecLexer': ('pygments.lexers.installers', 'RPMSpec', ('spec',), ('*.spec',), ('text/x-rpm-spec',)), 'RacketLexer': ('pygments.lexers.lisp', 'Racket', ('racket', 'rkt'), ('*.rkt', '*.rktd', '*.rktl'), ('text/x-racket', 'application/x-racket')), @@ -312,7 +321,7 @@ LEXERS = { 'RstLexer': ('pygments.lexers.markup', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), 'RubyConsoleLexer': ('pygments.lexers.ruby', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)), 'RubyLexer': ('pygments.lexers.ruby', 'Ruby', ('rb', 'ruby', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby'), ('text/x-ruby', 'application/x-ruby')), - 'RustLexer': ('pygments.lexers.rust', 'Rust', ('rust',), ('*.rs',), ('text/x-rustsrc',)), + 'RustLexer': ('pygments.lexers.rust', 'Rust', ('rust',), ('*.rs',), ('text/rust',)), 'SLexer': ('pygments.lexers.r', 'S', ('splus', 's', 'r'), ('*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'), ('text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', 'text/x-R', 'text/x-r-history', 'text/x-r-profile')), 'SMLLexer': ('pygments.lexers.ml', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')), 'SassLexer': ('pygments.lexers.css', 'Sass', ('sass',), ('*.sass',), ('text/x-sass',)), @@ -335,6 +344,7 @@ LEXERS = { 'SquidConfLexer': ('pygments.lexers.configs', 'SquidConf', ('squidconf', 'squid.conf', 'squid'), ('squid.conf',), ('text/x-squidconf',)), 'SspLexer': ('pygments.lexers.templates', 'Scalate Server Page', ('ssp',), ('*.ssp',), ('application/x-ssp',)), 'StanLexer': ('pygments.lexers.modeling', 'Stan', ('stan',), ('*.stan',), ()), + 'SuperColliderLexer': ('pygments.lexers.supercollider', 'SuperCollider', ('sc', 'supercollider'), ('*.sc', '*.scd'), ('application/supercollider', 'text/supercollider')), 'SwiftLexer': ('pygments.lexers.objective', 'Swift', ('swift',), ('*.swift',), ('text/x-swift',)), 'SwigLexer': ('pygments.lexers.c_like', 'SWIG', ('swig',), ('*.swg', '*.i'), ('text/swig',)), 'SystemVerilogLexer': ('pygments.lexers.hdl', 'systemverilog', ('systemverilog', 'sv'), ('*.sv', '*.svh'), ('text/x-systemverilog',)), @@ -342,10 +352,12 @@ LEXERS = { 'TclLexer': ('pygments.lexers.tcl', 'Tcl', ('tcl',), ('*.tcl', '*.rvt'), ('text/x-tcl', 'text/x-script.tcl', 'application/x-tcl')), 'TcshLexer': ('pygments.lexers.shell', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)), 'TeaTemplateLexer': ('pygments.lexers.templates', 'Tea', ('tea',), ('*.tea',), ('text/x-tea',)), + 'TerraformLexer': ('pygments.lexers.configs', 'Terraform', ('terraform', 'tf'), ('*.tf',), ('application/x-tf', 'application/x-terraform')), 'TexLexer': ('pygments.lexers.markup', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')), 'TextLexer': ('pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)), 'TodotxtLexer': ('pygments.lexers.textfmts', 'Todotxt', ('todotxt',), ('todo.txt', '*.todotxt'), ('text/x-todo',)), 'TreetopLexer': ('pygments.lexers.parsers', 'Treetop', ('treetop',), ('*.treetop', '*.tt'), ()), + 'TurtleLexer': ('pygments.lexers.rdf', 'Turtle', ('turtle',), ('*.ttl',), ('text/turtle', 'application/x-turtle')), 'TwigHtmlLexer': ('pygments.lexers.templates', 'HTML+Twig', ('html+twig',), ('*.twig',), ('text/html+twig',)), 'TwigLexer': ('pygments.lexers.templates', 'Twig', ('twig',), (), ('application/x-twig',)), 'TypeScriptLexer': ('pygments.lexers.javascript', 'TypeScript', ('ts',), ('*.ts',), ('text/x-typescript',)), diff --git a/pygments/lexers/_stan_builtins.py b/pygments/lexers/_stan_builtins.py index 0a225eba..6bf44574 100644 --- a/pygments/lexers/_stan_builtins.py +++ b/pygments/lexers/_stan_builtins.py @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This file contains the names of functions for Stan used by - ``pygments.lexers.math.StanLexer. This is for Stan language version 2.5.0. + ``pygments.lexers.math.StanLexer. This is for Stan language version 2.7.0 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. @@ -35,6 +35,7 @@ TYPES = ( 'positive_ordered', 'real', 'row_vector', + 'row_vectormatrix', 'simplex', 'unit_vector', 'vector', @@ -200,7 +201,6 @@ FUNCTIONS = ( 'lkj_corr_cholesky_rng', 'lkj_corr_log', 'lkj_corr_rng', - 'lkj_cov_log', 'lmgamma', 'log', 'log10', @@ -214,6 +214,7 @@ FUNCTIONS = ( 'log_diff_exp', 'log_falling_factorial', 'log_inv_logit', + 'log_mix', 'log_rising_factorial', 'log_softmax', 'log_sum_exp', @@ -236,6 +237,7 @@ FUNCTIONS = ( 'min', 'modified_bessel_first_kind', 'modified_bessel_second_kind', + 'multi_gp_cholesky_log', 'multi_gp_log', 'multi_normal_cholesky_log', 'multi_normal_cholesky_rng', @@ -248,6 +250,9 @@ FUNCTIONS = ( 'multinomial_rng', 'multiply_log', 'multiply_lower_tri_self_transpose', + 'neg_binomial_2_ccdf_log', + 'neg_binomial_2_cdf', + 'neg_binomial_2_cdf_log', 'neg_binomial_2_log', 'neg_binomial_2_log_log', 'neg_binomial_2_log_rng', @@ -284,6 +289,7 @@ FUNCTIONS = ( 'poisson_cdf_log', 'poisson_log', 'poisson_log_log', + 'poisson_log_rng', 'poisson_rng', 'positive_infinity', 'pow', @@ -371,6 +377,7 @@ FUNCTIONS = ( 'weibull_cdf_log', 'weibull_log', 'weibull_rng', + 'wiener_log', 'wishart_log', 'wishart_rng' ) @@ -400,10 +407,10 @@ DISTRIBUTIONS = ( 'inv_wishart', 'lkj_corr', 'lkj_corr_cholesky', - 'lkj_cov', 'logistic', 'lognormal', 'multi_gp', + 'multi_gp_cholesky', 'multi_normal', 'multi_normal_cholesky', 'multi_normal_prec', @@ -425,6 +432,7 @@ DISTRIBUTIONS = ( 'uniform', 'von_mises', 'weibull', + 'wiener', 'wishart' ) diff --git a/pygments/lexers/archetype.py b/pygments/lexers/archetype.py new file mode 100644 index 00000000..4f1b2645 --- /dev/null +++ b/pygments/lexers/archetype.py @@ -0,0 +1,316 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.archetype + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for Archetype-related syntaxes, including: + + - ODIN syntax <https://github.com/openEHR/odin> + - ADL syntax <http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf> + - cADL sub-syntax of ADL + + For uses of this syntax, see the openEHR archetypes <http://www.openEHR.org/ckm> + + Contributed by Thomas Beale <https://github.com/wolandscat>, + <https://bitbucket.org/thomas_beale>. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups, using, default +from pygments.token import Text, Comment, Name, Literal, Number, String, \ + Punctuation, Keyword, Operator, Generic + +__all__ = ['OdinLexer', 'CadlLexer', 'AdlLexer'] + + +class AtomsLexer(RegexLexer): + """ + Lexer for Values used in ADL and ODIN. + + .. versionadded:: 2.1 + """ + + tokens = { + # ----- pseudo-states for inclusion ----- + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + (r'[ \t]*--.*$', Comment), + ], + 'archetype_id': [ + (r'[ \t]*([a-zA-Z]\w+(\.[a-zA-Z]\w+)*::)?[a-zA-Z]\w+(-[a-zA-Z]\w+){2}' + r'\.\w+[\w-]*\.v\d+(\.\d+){,2}((-[a-z]+)(\.\d+)?)?', Name.Decorator), + ], + 'date_constraints': [ + # ISO 8601-based date/time constraints + (r'[Xx?YyMmDdHhSs\d]{2,4}([:-][Xx?YyMmDdHhSs\d]{2}){2}', Literal.Date), + # ISO 8601-based duration constraints + optional trailing slash + (r'(P[YyMmWwDd]+(T[HhMmSs]+)?|PT[HhMmSs]+)/?', Literal.Date), + ], + 'ordered_values': [ + # ISO 8601 date with optional 'T' ligature + (r'\d{4}-\d{2}-\d{2}T?', Literal.Date), + # ISO 8601 time + (r'\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{4}|Z)?', Literal.Date), + # ISO 8601 duration + (r'P((\d*(\.\d+)?[YyMmWwDd]){1,3}(T(\d*(\.\d+)?[HhMmSs]){,3})?|' + r'T(\d*(\.\d+)?[HhMmSs]){,3})', Literal.Date), + (r'[+-]?(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), + (r'[+-]?(\d+)*\.\d+%?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[+-]?\d+%?', Number.Integer), + ], + 'values': [ + include('ordered_values'), + (r'([Tt]rue|[Ff]alse)', Literal), + (r'"', String, 'string'), + (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'[a-z][a-z0-9+.-]*:', Literal, 'uri'), + # term code + (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)(\w[\w-]*)(\])', + bygroups(Punctuation, Name.Decorator, Punctuation, Name.Decorator, Punctuation)), + (r'\|', Punctuation, 'interval'), + # list continuation + (r'\.\.\.', Punctuation), + ], + 'constraint_values': [ + (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)', + bygroups(Punctuation, Name.Decorator, Punctuation), 'adl14_code_constraint'), + # ADL 1.4 ordinal constraint + (r'(\d*)(\|)(\[\w[\w-]*::\w[\w-]*\])((?:[,;])?)', + bygroups(Number, Punctuation, Name.Decorator, Punctuation)), + include('date_constraints'), + include('values'), + ], + + # ----- real states ----- + 'string': [ + ('"', String, '#pop'), + (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' + r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), + # all other characters + (r'[^\\"]+', String), + # stray backslash + (r'\\', String), + ], + 'uri': [ + # effective URI terminators + (r'[,>\s]', Punctuation, '#pop'), + (r'[^>\s,]+', Literal), + ], + 'interval': [ + (r'\|', Punctuation, '#pop'), + include('ordered_values'), + (r'\.\.', Punctuation), + (r'[<>=] *', Punctuation), + # handle +/- + (r'\+/-', Punctuation), + (r'\s+', Text), + ], + 'any_code': [ + include('archetype_id'), + # if it is a code + (r'[a-z_]\w*[0-9.]+(@[^\]]+)?', Name.Decorator), + # if it is tuple with attribute names + (r'[a-z_]\w*', Name.Class), + # if it is an integer, i.e. Xpath child index + (r'[0-9]+', Text), + (r'\|', Punctuation, 'code_rubric'), + (r'\]', Punctuation, '#pop'), + # handle use_archetype statement + (r'\s*,\s*', Punctuation), + ], + 'code_rubric': [ + (r'\|', Punctuation, '#pop'), + (r'[^|]+', String), + ], + 'adl14_code_constraint': [ + (r'\]', Punctuation, '#pop'), + (r'\|', Punctuation, 'code_rubric'), + (r'(\w[\w-]*)([;,]?)', bygroups(Name.Decorator, Punctuation)), + include('whitespace'), + ], + } + + +class OdinLexer(AtomsLexer): + """ + Lexer for ODIN syntax. + + .. versionadded:: 2.1 + """ + name = 'ODIN' + aliases = ['odin'] + filenames = ['*.odin'] + mimetypes = ['text/odin'] + + tokens = { + 'path': [ + (r'>', Punctuation, '#pop'), + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'/', Punctuation), + (r'\[', Punctuation, 'key'), + (r'\s*,\s*', Punctuation, '#pop'), + (r'\s+', Text, '#pop'), + ], + 'key': [ + include('values'), + (r'\]', Punctuation, '#pop'), + ], + 'type_cast': [ + (r'\)', Punctuation, '#pop'), + (r'[^)]+', Name.Class), + ], + 'root': [ + include('whitespace'), + (r'([Tt]rue|[Ff]alse)', Literal), + include('values'), + # x-ref path + (r'/', Punctuation, 'path'), + # x-ref path starting with key + (r'\[', Punctuation, 'key'), + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'=', Operator), + (r'\(', Punctuation, 'type_cast'), + (r',', Punctuation), + (r'<', Punctuation), + (r'>', Punctuation), + (r';', Punctuation), + ], + } + + +class CadlLexer(AtomsLexer): + """ + Lexer for cADL syntax. + + .. versionadded:: 2.1 + """ + name = 'cADL' + aliases = ['cadl'] + filenames = ['*.cadl'] + + tokens = { + 'path': [ + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'/', Punctuation), + (r'\[', Punctuation, 'any_code'), + (r'\s+', Punctuation, '#pop'), + ], + 'root': [ + include('whitespace'), + (r'(cardinality|existence|occurrences|group|include|exclude|' + r'allow_archetype|use_archetype|use_node)\W', Keyword.Type), + (r'(and|or|not|there_exists|xor|implies|for_all)\W', Keyword.Type), + (r'(after|before|closed)\W', Keyword.Type), + (r'(not)\W', Operator), + (r'(matches|is_in)\W', Operator), + # is_in / not is_in char + (u'(\u2208|\u2209)', Operator), + # there_exists / not there_exists / for_all / and / or + (u'(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)', + Operator), + # regex in slot or as string constraint + (r'(\{)(\s*/[^}]+/\s*)(\})', + bygroups(Punctuation, String.Regex, Punctuation)), + # regex in slot or as string constraint + (r'(\{)(\s*\^[^}]+\^\s*)(\})', + bygroups(Punctuation, String.Regex, Punctuation)), + (r'/', Punctuation, 'path'), + # for cardinality etc + (r'(\{)((?:\d+\.\.)?(?:\d+|\*))((?:\s*;\s*(?:ordered|unordered|unique)){,2})(\})', + bygroups(Punctuation, Number, Number, Punctuation)), + # [{ is start of a tuple value + (r'\[\{', Punctuation), + (r'\}\]', Punctuation), + (r'\{', Punctuation), + (r'\}', Punctuation), + include('constraint_values'), + # type name + (r'[A-Z]\w+(<[A-Z]\w+([A-Za-z_<>]*)>)?', Name.Class), + # attribute name + (r'[a-z_]\w*', Name.Class), + (r'\[', Punctuation, 'any_code'), + (r'(~|//|\\\\|\+|-|/|\*|\^|!=|=|<=|>=|<|>]?)', Operator), + (r'\(', Punctuation), + (r'\)', Punctuation), + # for lists of values + (r',', Punctuation), + (r'"', String, 'string'), + # for assumed value + (r';', Punctuation), + ], + } + + +class AdlLexer(AtomsLexer): + """ + Lexer for ADL syntax. + + .. versionadded:: 2.1 + """ + + name = 'ADL' + aliases = ['adl'] + filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx'] + + tokens = { + 'whitespace': [ + # blank line ends + (r'\s*\n', Text), + # comment-only line + (r'^[ \t]*--.*$', Comment), + ], + 'odin_section': [ + # repeating the following two rules from the root state enable multi-line strings + # that start in the first column to be dealt with + (r'^(language|description|ontology|terminology|annotations|' + r'component_terminologies|revision_history)[ \t]*\n', Generic.Heading), + (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), + (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)), + (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)), + # template overlay delimiter + (r'^----------*\n', Text, '#pop'), + (r'^.*\n', String), + default('#pop'), + ], + 'cadl_section': [ + (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)), + default('#pop'), + ], + 'rules_section': [ + (r'^[ \t]+.*\n', using(CadlLexer)), + default('#pop'), + ], + 'metadata': [ + (r'\)', Punctuation, '#pop'), + (r';', Punctuation), + (r'([Tt]rue|[Ff]alse)', Literal), + # numbers and version ids + (r'\d+(\.\d+)*', Literal), + # Guids + (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal), + (r'\w+', Name.Class), + (r'"', String, 'string'), + (r'=', Operator), + (r'[ \t]+', Text), + default('#pop'), + ], + 'root': [ + (r'^(archetype|template_overlay|operational_template|template|' + r'speciali[sz]e)', Generic.Heading), + (r'^(language|description|ontology|terminology|annotations|' + r'component_terminologies|revision_history)[ \t]*\n', + Generic.Heading, 'odin_section'), + (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), + (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'), + include('archetype_id'), + (r'[ \t]*\(', Punctuation, 'metadata'), + include('whitespace'), + ], + } diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index c308f7fc..918ed83b 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -286,7 +286,8 @@ class LlvmLexer(RegexLexer): r'|lshr|ashr|and|or|xor|icmp|fcmp' r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui' - r'|fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch' + r'|fptosi|inttoptr|ptrtoint|bitcast|addrspacecast' + r'|select|va_arg|ret|br|switch' r'|invoke|unwind|unreachable' r'|indirectbr|landingpad|resume' diff --git a/pygments/lexers/automation.py b/pygments/lexers/automation.py index c8e6b0b8..2ebc4d24 100644 --- a/pygments/lexers/automation.py +++ b/pygments/lexers/automation.py @@ -316,7 +316,8 @@ class AutoItLexer(RegexLexer): tokens = { 'root': [ (r';.*\n', Comment.Single), - (r'(#comments-start|#cs).*?(#comments-end|#ce)', Comment.Multiline), + (r'(#comments-start|#cs)(.|\n)*?(#comments-end|#ce)', + Comment.Multiline), (r'[\[\]{}(),;]', Punctuation), (r'(and|or|not)\b', Operator.Word), (r'[$|@][a-zA-Z_]\w*', Name.Variable), diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index b565b97b..35ea517f 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -28,8 +28,10 @@ class CFamilyLexer(RegexLexer): #: optional Comment or Whitespace _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+' + + # The trailing ?, rather than *, avoids a geometric performance drop here. #: only one /* */ style comment - _ws1 = r'\s*(?:/[*].*?[*]/\s*)*' + _ws1 = r'\s*(?:/[*].*?[*]/\s*)?' tokens = { 'whitespace': [ @@ -202,7 +204,7 @@ class CppLexer(CFamilyLexer): 'export', 'friend', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', 'reinterpret_cast', 'restrict', 'static_cast', 'template', 'this', 'throw', 'throws', - 'typeid', 'typename', 'using', 'virtual', + 'try', 'typeid', 'typename', 'using', 'virtual', 'constexpr', 'nullptr', 'decltype', 'thread_local', 'alignas', 'alignof', 'static_assert', 'noexcept', 'override', 'final'), suffix=r'\b'), Keyword), diff --git a/pygments/lexers/c_like.py b/pygments/lexers/c_like.py index a08d86a3..27736bff 100644 --- a/pygments/lexers/c_like.py +++ b/pygments/lexers/c_like.py @@ -414,7 +414,7 @@ class MqlLexer(CppLexer): class ArduinoLexer(CppLexer): """ - For `Arduino® <https://arduino.cc/>`_ source. + For `Arduino(tm) <https://arduino.cc/>`_ source. This is an extension of the CppLexer, as the Arduino® Language is a superset of C++ diff --git a/pygments/lexers/chapel.py b/pygments/lexers/chapel.py index 520be37b..5b7be4dd 100644 --- a/pygments/lexers/chapel.py +++ b/pygments/lexers/chapel.py @@ -46,9 +46,10 @@ class ChapelLexer(RegexLexer): 'continue', 'delete', 'dmapped', 'do', 'domain', 'else', 'enum', 'export', 'extern', 'for', 'forall', 'if', 'index', 'inline', 'iter', 'label', 'lambda', 'let', 'local', 'new', 'noinit', 'on', - 'otherwise', 'pragma', 'reduce', 'return', 'scan', 'select', - 'serial', 'single', 'sparse', 'subdomain', 'sync', 'then', 'use', - 'when', 'where', 'while', 'with', 'yield', 'zip'), suffix=r'\b'), + 'otherwise', 'pragma', 'private', 'public', 'reduce', + 'require', 'return', 'scan', 'select', 'serial', 'single', + 'sparse', 'subdomain', 'sync', 'then', 'use', 'when', 'where', + 'while', 'with', 'yield', 'zip'), suffix=r'\b'), Keyword), (r'(proc)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'procname'), (r'(class|module|record|union)(\s+)', bygroups(Keyword, Text), diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 1bd8f55a..6b00e5f4 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -18,7 +18,8 @@ from pygments.lexers.shell import BashLexer __all__ = ['IniLexer', 'RegeditLexer', 'PropertiesLexer', 'KconfigLexer', 'Cfengine3Lexer', 'ApacheConfLexer', 'SquidConfLexer', - 'NginxConfLexer', 'LighttpdConfLexer', 'DockerLexer'] + 'NginxConfLexer', 'LighttpdConfLexer', 'DockerLexer', + 'TerraformLexer'] class IniLexer(RegexLexer): @@ -544,3 +545,75 @@ class DockerLexer(RegexLexer): (r'(.*\\\n)*.+', using(BashLexer)), ], } + + +class TerraformLexer(RegexLexer): + """ + Lexer for `terraformi .tf files <https://www.terraform.io/>`_ + + .. versionadded:: 2.1 + """ + + name = 'Terraform' + aliases = ['terraform', 'tf'] + filenames = ['*.tf'] + mimetypes = ['application/x-tf', 'application/x-terraform'] + + tokens = { + 'root': [ + include('string'), + include('punctuation'), + include('curly'), + include('basic'), + include('whitespace'), + (r'[0-9]+', Number), + ], + 'basic': [ + (words(('true', 'false'), prefix=r'\b', suffix=r'\b'), Keyword.Type), + (r'\s*/\*', Comment.Multiline, 'comment'), + (r'\s*#.*\n', Comment.Single), + (r'(.*?)(\s*)(=)', bygroups(Name.Attribute, Text, Operator)), + (words(('variable', 'resource', 'provider', 'provisioner', 'module'), + prefix=r'\b', suffix=r'\b'), Keyword.Reserved, 'function'), + (words(('ingress', 'egress', 'listener', 'default', 'connection'), + prefix=r'\b', suffix=r'\b'), Keyword.Declaration), + ('\$\{', String.Interpol, 'var_builtin'), + ], + 'function': [ + (r'(\s+)(".*")(\s+)', bygroups(Text, String, Text)), + include('punctuation'), + include('curly'), + ], + 'var_builtin': [ + (r'\$\{', String.Interpol, '#push'), + (words(('concat', 'file', 'join', 'lookup', 'element'), + prefix=r'\b', suffix=r'\b'), Name.Builtin), + include('string'), + include('punctuation'), + (r'\s+', Text), + (r'\}', String.Interpol, '#pop'), + ], + 'string':[ + (r'(".*")', bygroups(String.Double)), + ], + 'punctuation':[ + (r'[\[\]\(\),.]', Punctuation), + ], + # Keep this seperate from punctuation - we sometimes want to use different + # Tokens for { } + 'curly':[ + (r'\{', Text.Punctuation), + (r'\}', Text.Punctuation), + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ], + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), + ], + } diff --git a/pygments/lexers/css.py b/pygments/lexers/css.py index c11e7ec0..4165bcc1 100644 --- a/pygments/lexers/css.py +++ b/pygments/lexers/css.py @@ -13,12 +13,12 @@ import re import copy from pygments.lexer import ExtendedRegexLexer, RegexLexer, include, bygroups, \ - default, words + default, words, inherit from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation from pygments.util import iteritems -__all__ = ['CssLexer', 'SassLexer', 'ScssLexer'] +__all__ = ['CssLexer', 'SassLexer', 'ScssLexer', 'LessCssLexer'] class CssLexer(RegexLexer): @@ -484,6 +484,7 @@ class ScssLexer(RegexLexer): (r'[^\s:="\[]+', Name.Attribute), (r'#\{', String.Interpol, 'interpolation'), (r'[ \t]*:', Operator, 'value'), + default('#pop'), ], 'inline-comment': [ @@ -496,3 +497,27 @@ class ScssLexer(RegexLexer): tokens[group] = copy.copy(common) tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, '#pop')]) + + +class LessCssLexer(CssLexer): + """ + For `LESS <http://lesscss.org/>`_ styleshets. + + .. versionadded:: 2.1 + """ + + name = 'LessCss' + aliases = ['less'] + filenames = ['*.less'] + mimetypes = ['text/x-less-css'] + + tokens = { + 'root': [ + (r'@\w+', Name.Variable), + inherit, + ], + 'content': [ + (r'{', Punctuation, '#push'), + inherit, + ], + } diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py index f61b292d..1f317260 100644 --- a/pygments/lexers/esoteric.py +++ b/pygments/lexers/esoteric.py @@ -9,11 +9,11 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, include +from pygments.lexer import RegexLexer, include, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Error + Number, Punctuation, Error, Whitespace -__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer'] +__all__ = ['BrainfuckLexer', 'BefungeLexer', 'BoogieLexer', 'RedcodeLexer'] class BrainfuckLexer(RegexLexer): @@ -112,3 +112,48 @@ class RedcodeLexer(RegexLexer): (r'[-+]?\d+', Number.Integer), ], } + + +class BoogieLexer(RegexLexer): + """ + For `Boogie <https://boogie.codeplex.com/>`_ source code. + + .. versionadded:: 2.0 + """ + name = 'Boogie' + aliases = ['boogie'] + filenames = ['*.bpl'] + + tokens = { + 'root': [ + # Whitespace and Comments + (r'\n', Whitespace), + (r'\s+', Whitespace), + (r'//[/!](.*?)\n', Comment.Doc), + (r'//(.*?)\n', Comment.Single), + (r'/\*', Comment.Multiline, 'comment'), + + (words(( + 'axiom', 'break', 'call', 'ensures', 'else', 'exists', 'function', + 'forall', 'if', 'invariant', 'modifies', 'procedure', 'requires', + 'then', 'var', 'while'), + suffix=r'\b'), Keyword), + (words(('const',), suffix=r'\b'), Keyword.Reserved), + + (words(('bool', 'int', 'ref'), suffix=r'\b'), Keyword.Type), + include('numbers'), + (r"(>=|<=|:=|!=|==>|&&|\|\||[+/\-=>*<\[\]])", Operator), + (r"([{}():;,.])", Punctuation), + # Identifier + (r'[a-zA-Z_]\w*', Name), + ], + 'comment': [ + (r'[^*/]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline), + ], + 'numbers': [ + (r'[0-9]+', Number.Integer), + ], + } diff --git a/pygments/lexers/fortran.py b/pygments/lexers/fortran.py index 8ba54aff..d822160f 100644 --- a/pygments/lexers/fortran.py +++ b/pygments/lexers/fortran.py @@ -11,11 +11,11 @@ import re -from pygments.lexer import RegexLexer, include, words +from pygments.lexer import RegexLexer, bygroups, include, words, using from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation + Number, Punctuation, Generic -__all__ = ['FortranLexer'] +__all__ = ['FortranLexer', 'FortranFixedLexer'] class FortranLexer(RegexLexer): @@ -26,7 +26,7 @@ class FortranLexer(RegexLexer): """ name = 'Fortran' aliases = ['fortran'] - filenames = ['*.f', '*.f90', '*.F', '*.F90'] + filenames = ['*.f03', '*.f90', '*.F03', '*.F90'] mimetypes = ['text/x-fortran'] flags = re.IGNORECASE | re.MULTILINE @@ -159,3 +159,47 @@ class FortranLexer(RegexLexer): (r'[+-]?\d+\.\d*(e[-+]?\d+)?(_[a-z]\w+)?', Number.Float), ], } + + +class FortranFixedLexer(RegexLexer): + """ + Lexer for fixed format Fortran. + + .. versionadded:: 2.1 + """ + name = 'FortranFixed' + aliases = ['fortranfixed'] + filenames = ['*.f', '*.F'] + + flags = re.IGNORECASE + + def _lex_fortran(self, match, ctx=None): + """Lex a line just as free form fortran without line break.""" + lexer = FortranLexer() + text = match.group(0) + "\n" + for index, token, value in lexer.get_tokens_unprocessed(text): + value = value.replace('\n', '') + if value != '': + yield index, token, value + + tokens = { + 'root': [ + (r'[C*].*\n', Comment), + (r'#.*\n', Comment.Preproc), + (r' {0,4}!.*\n', Comment), + (r'(.{5})', Name.Label, 'cont-char'), + (r'.*\n', using(FortranLexer)), + ], + + 'cont-char': [ + (' ', Text, 'code'), + ('0', Comment, 'code'), + ('.', Generic.Strong, 'code') + ], + + 'code': [ + (r'(.{66})(.*)(\n)', + bygroups(_lex_fortran, Comment, Text), 'root'), + (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), + (r'', Text, 'root')] + } diff --git a/pygments/lexers/html.py b/pygments/lexers/html.py index 1c35325f..7893952f 100644 --- a/pygments/lexers/html.py +++ b/pygments/lexers/html.py @@ -46,12 +46,19 @@ class HtmlLexer(RegexLexer): ('<!--', Comment, 'comment'), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), - (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), - (r'<\s*style\s*', Name.Tag, ('style-content', 'tag')), + (r'(<)(\s*)(script)(\s*)', + bygroups(Punctuation, Text, Name.Tag, Text), + ('script-content', 'tag')), + (r'(<)(\s*)(style)(\s*)', + bygroups(Punctuation, Text, Name.Tag, Text), + ('style-content', 'tag')), # note: this allows tag names not used in HTML like <x:with-dash>, # this is to support yet-unknown template engines and the like - (r'<\s*[\w:.-]+', Name.Tag, 'tag'), - (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), + (r'(<)(\s*)([\w:.-]+)', + bygroups(Punctuation, Text, Name.Tag), 'tag'), + (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation)), ], 'comment': [ ('[^-]+', Comment), @@ -60,16 +67,21 @@ class HtmlLexer(RegexLexer): ], 'tag': [ (r'\s+', Text), - (r'([\w:-]+\s*=)(\s*)', bygroups(Name.Attribute, Text), 'attr'), + (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text), + 'attr'), (r'[\w:-]+', Name.Attribute), - (r'/?\s*>', Name.Tag, '#pop'), + (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'), ], 'script-content': [ - (r'<\s*/\s*script\s*>', Name.Tag, '#pop'), + (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation), '#pop'), (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), ], 'style-content': [ - (r'<\s*/\s*style\s*>', Name.Tag, '#pop'), + (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation),'#pop'), (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), ], 'attr': [ diff --git a/pygments/lexers/igor.py b/pygments/lexers/igor.py index f558b80a..b0eaf6aa 100644 --- a/pygments/lexers/igor.py +++ b/pygments/lexers/igor.py @@ -35,16 +35,17 @@ class IgorLexer(RegexLexer): flowControl = ( 'if', 'else', 'elseif', 'endif', 'for', 'endfor', 'strswitch', 'switch', 'case', 'default', 'endswitch', 'do', 'while', 'try', 'catch', 'endtry', - 'break', 'continue', 'return', + 'break', 'continue', 'return', 'AbortOnRTE', 'AbortOnValue' ) types = ( 'variable', 'string', 'constant', 'strconstant', 'NVAR', 'SVAR', 'WAVE', - 'STRUCT', 'dfref' + 'STRUCT', 'dfref', 'funcref', 'char', 'uchar', 'int16', 'uint16', 'int32', + 'uint32', 'float', 'double' ) keywords = ( - 'override', 'ThreadSafe', 'static', 'FuncFit', 'Proc', 'Picture', - 'Prompt', 'DoPrompt', 'macro', 'window', 'graph', 'function', 'end', - 'Structure', 'EndStructure', 'EndMacro', 'Menu', 'SubMenu', + 'override', 'ThreadSafe', 'MultiThread', 'static', 'Proc', + 'Picture', 'Prompt', 'DoPrompt', 'macro', 'window', 'function', 'end', + 'Structure', 'EndStructure', 'EndMacro', 'Menu', 'SubMenu' ) operations = ( 'Abort', 'AddFIFOData', 'AddFIFOVectData', 'AddMovieAudio', @@ -161,7 +162,7 @@ class IgorLexer(RegexLexer): 'CreationDate', 'csc', 'DataFolderExists', 'DataFolderRefsEqual', 'DataFolderRefStatus', 'date2secs', 'datetime', 'DateToJulian', 'Dawson', 'DDEExecute', 'DDEInitiate', 'DDEPokeString', 'DDEPokeWave', - 'DDERequestWave', 'DDEStatus', 'DDETerminate', 'deltax', 'digamma', + 'DDERequestWave', 'DDEStatus', 'DDETerminate', 'defined', 'deltax', 'digamma', 'DimDelta', 'DimOffset', 'DimSize', 'ei', 'enoise', 'equalWaves', 'erf', 'erfc', 'exists', 'exp', 'expInt', 'expNoise', 'factorial', 'fakedata', 'faverage', 'faverageXY', 'FindDimLabel', 'FindListItem', 'floor', @@ -223,7 +224,7 @@ class IgorLexer(RegexLexer): 'ThreadGroupWait', 'ThreadProcessorCount', 'ThreadReturnValue', 'ticks', 'trunc', 'Variance', 'vcsr', 'WaveCRC', 'WaveDims', 'WaveExists', 'WaveMax', 'WaveMin', 'WaveRefsEqual', 'WaveType', 'WhichListItem', - 'WinType', 'WNoise', 'x', 'x2pnt', 'xcsr', 'y', 'z', 'zcsr', 'ZernikeR', + 'WinType', 'WNoise', 'x2pnt', 'xcsr', 'zcsr', 'ZernikeR', ) functions += ( 'AddListItem', 'AnnotationInfo', 'AnnotationList', 'AxisInfo', diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index fa7dca41..7dcfbb4b 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -36,7 +36,7 @@ class JavascriptLexer(RegexLexer): name = 'JavaScript' aliases = ['js', 'javascript'] - filenames = ['*.js', ] + filenames = ['*.js', '*.jsm', ] mimetypes = ['application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript', ] @@ -60,7 +60,7 @@ class JavascriptLexer(RegexLexer): (r'\n', Text, '#pop') ], 'root': [ - (r'\A#! ?/.*?\n', Comment), # shebang lines are recognized by node.js + (r'\A#! ?/.*?\n', Comment.Hashbang), # recognized by node.js (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' diff --git a/pygments/lexers/julia.py b/pygments/lexers/julia.py index 1b7d543a..cf7c7d61 100644 --- a/pygments/lexers/julia.py +++ b/pygments/lexers/julia.py @@ -14,7 +14,7 @@ import re from pygments.lexer import Lexer, RegexLexer, bygroups, combined, do_insertions from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic -from pygments.util import shebang_matches +from pygments.util import shebang_matches, unirange __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] @@ -30,6 +30,8 @@ class JuliaLexer(RegexLexer): filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] + flags = re.MULTILINE | re.UNICODE + builtins = [ 'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple', 'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype', @@ -89,7 +91,8 @@ class JuliaLexer(RegexLexer): # names (r'@[\w.]+', Name.Decorator), - (r'[a-zA-Z_]\w*', Name), + (u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' % + ((unirange(0x10000, 0x10ffff),)*2), Name), # numbers (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 6b302c7e..4d3c9159 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -457,7 +457,7 @@ class GroovyLexer(RegexLexer): name = 'Groovy' aliases = ['groovy'] - filenames = ['*.groovy'] + filenames = ['*.groovy','*.gradle'] mimetypes = ['text/x-groovy'] flags = re.MULTILINE | re.DOTALL diff --git a/pygments/lexers/modeling.py b/pygments/lexers/modeling.py index 43194436..ec99543f 100644 --- a/pygments/lexers/modeling.py +++ b/pygments/lexers/modeling.py @@ -284,8 +284,8 @@ class StanLexer(RegexLexer): """Pygments Lexer for Stan models. The Stan modeling language is specified in the *Stan Modeling Language - User's Guide and Reference Manual, v2.5.0*, - `pdf <https://github.com/stan-dev/stan/releases/download/v2.5.0/stan-reference-2.5.0.pdf>`__. + User's Guide and Reference Manual, v2.7.0*, + `pdf <https://github.com/stan-dev/stan/releases/download/v2.7.0/stan-reference-2.7.0.pdf>`__. .. versionadded:: 1.6 """ diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py new file mode 100644 index 00000000..d32bb5bb --- /dev/null +++ b/pygments/lexers/modula2.py @@ -0,0 +1,1566 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.modula2 + ~~~~~~~~~~~~~~~~~~~~~~~ + + Multi-Dialect Lexer for Modula-2. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include +from pygments.util import get_bool_opt, get_list_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, \ + String, Number, Punctuation, Error + +__all__ = ['Modula2Lexer'] + + +# Multi-Dialect Modula-2 Lexer +class Modula2Lexer(RegexLexer): + """ + For `Modula-2 <http://www.modula2.org/>`_ source code. + + The Modula-2 lexer supports several dialects. By default, it operates in + fallback mode, recognising the *combined* literals, punctuation symbols + and operators of all supported dialects, and the *combined* reserved words + and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not + differentiating between library defined identifiers. + + To select a specific dialect, a dialect option may be passed + or a dialect tag may be embedded into a source file. + + Dialect Options: + + `m2pim` + Select PIM Modula-2 dialect. + `m2iso` + Select ISO Modula-2 dialect. + `m2r10` + Select Modula-2 R10 dialect. + `objm2` + Select Objective Modula-2 dialect. + + The PIM and ISO dialect options may be qualified with a language extension. + + Language Extensions: + + `+aglet` + Select Aglet Modula-2 extensions, available with m2iso. + `+gm2` + Select GNU Modula-2 extensions, available with m2pim. + `+p1` + Select p1 Modula-2 extensions, available with m2iso. + `+xds` + Select XDS Modula-2 extensions, available with m2iso. + + + Passing a Dialect Option via Unix Commandline Interface + + Dialect options may be passed to the lexer using the `dialect` key. + Only one such option should be passed. If multiple dialect options are + passed, the first valid option is used, any subsequent options are ignored. + + Examples: + + `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input` + Use ISO dialect to render input to HTML output + `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input` + Use ISO dialect with p1 extensions to render input to RTF output + + + Embedding a Dialect Option within a source file + + A dialect option may be embedded in a source file in form of a dialect + tag, a specially formatted comment that specifies a dialect option. + + Dialect Tag EBNF: + + dialectTag : + OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ; + + dialectOption : + 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' | + 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ; + + Prefix : '!' ; + + OpeningCommentDelim : '(*' ; + + ClosingCommentDelim : '*)' ; + + No whitespace is permitted between the tokens of a dialect tag. + + In the event that a source file contains multiple dialect tags, the first + tag that contains a valid dialect option will be used and any subsequent + dialect tags will be ignored. Ideally, a dialect tag should be placed + at the beginning of a source file. + + An embedded dialect tag overrides a dialect option set via command line. + + Examples: + + `(*!m2r10*) DEFINITION MODULE Foobar; ...` + Use Modula2 R10 dialect to render this source file. + `(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...` + Use PIM dialect with GNU extensions to render this source file. + + + Algol Publication Mode: + + In Algol publication mode, source text is rendered for publication of + algorithms in scientific papers and academic texts, following the format + of the Revised Algol-60 Language Report. It is activated by passing + one of two corresponding styles as an option: + + `algol` + render reserved words lowercase underline boldface + and builtins lowercase boldface italic + `algol_nu` + render reserved words lowercase boldface (no underlining) + and builtins lowercase boldface italic + + The lexer automatically performs the required lowercase conversion when + this mode is activated. + + Example: + + `$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input` + Render input file in Algol publication mode to LaTeX output. + + + Rendering Mode of First Class ADT Identifiers: + + The rendering of standard library first class ADT identifiers is controlled + by option flag "treat_stdlib_adts_as_builtins". + + When this option is turned on, standard library ADT identifiers are rendered + as builtins. When it is turned off, they are rendered as ordinary library + identifiers. + + `treat_stdlib_adts_as_builtins` (default: On) + + The option is useful for dialects that support ADTs as first class objects + and provide ADTs in the standard library that would otherwise be built-in. + + At present, only Modula-2 R10 supports library ADTs as first class objects + and therefore, no ADT identifiers are defined for any other dialects. + + Example: + + `$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...` + Render standard library ADTs as ordinary library types. + + .. versionadded:: 1.3 + + .. versionchanged:: 2.1 + Added multi-dialect support. + """ + name = 'Modula-2' + aliases = ['modula2', 'm2'] + filenames = ['*.def', '*.mod'] + mimetypes = ['text/x-modula2'] + + flags = re.MULTILINE | re.DOTALL + + tokens = { + 'whitespace': [ + (r'\n+', Text), # blank lines + (r'\s+', Text), # whitespace + ], + 'dialecttags': [ + # PIM Dialect Tag + (r'\(\*!m2pim\*\)', Comment.Special), + # ISO Dialect Tag + (r'\(\*!m2iso\*\)', Comment.Special), + # M2R10 Dialect Tag + (r'\(\*!m2r10\*\)', Comment.Special), + # ObjM2 Dialect Tag + (r'\(\*!objm2\*\)', Comment.Special), + # Aglet Extensions Dialect Tag + (r'\(\*!m2iso\+aglet\*\)', Comment.Special), + # GNU Extensions Dialect Tag + (r'\(\*!m2pim\+gm2\*\)', Comment.Special), + # p1 Extensions Dialect Tag + (r'\(\*!m2iso\+p1\*\)', Comment.Special), + # XDS Extensions Dialect Tag + (r'\(\*!m2iso\+xds\*\)', Comment.Special), + ], + 'identifiers': [ + (r'([a-zA-Z_$][\w$]*)', Name), + ], + 'prefixed_number_literals': [ + # + # Base-2, whole number + (r'0b[01]+(\'[01]+)*', Number.Bin), + # + # Base-16, whole number + (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex), + ], + 'plain_number_literals': [ + # + # Base-10, real number with exponent + (r'[0-9]+(\'[0-9]+)*' # integral part \ + r'\.[0-9]+(\'[0-9]+)*' # fractional part \ + r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent \ + Number.Float), + # + # Base-10, real number without exponent + (r'[0-9]+(\'[0-9]+)*' # integral part \ + r'\.[0-9]+(\'[0-9]+)*', # fractional part \ + Number.Float), + # + # Base-10, whole number + (r'[0-9]+(\'[0-9]+)*', Number.Integer), + ], + 'suffixed_number_literals': [ + # + # Base-8, whole number + (r'[0-7]+B', Number.Oct), + # + # Base-8, character code + (r'[0-7]+C', Number.Oct), + # + # Base-16, number + (r'[0-9A-F]+H', Number.Hex), + ], + 'string_literals': [ + (r"'(\\\\|\\'|[^'])*'", String), # single quoted string + (r'"(\\\\|\\"|[^"])*"', String), # double quoted string + ], + 'digraph_operators': [ + # Dot Product Operator + (r'\*\.', Operator), + # Array Concatenation Operator + (r'\+>', Operator), # M2R10 + ObjM2 + # Inequality Operator + (r'<>', Operator), # ISO + PIM + # Less-Or-Equal, Subset + (r'<=', Operator), + # Greater-Or-Equal, Superset + (r'>=', Operator), + # Identity Operator + (r'==', Operator), # M2R10 + ObjM2 + # Type Conversion Operator + (r'::', Operator), # M2R10 + ObjM2 + # Assignment Symbol + (r':=', Operator), + # Postfix Increment Mutator + (r'\+\+', Operator), # M2R10 + ObjM2 + # Postfix Decrement Mutator + (r'--', Operator), # M2R10 + ObjM2 + ], + 'unigraph_operators': [ + # Arithmetic Operators + (r'[+-]', Operator), + (r'[*/]', Operator), + # ISO 80000-2 compliant Set Difference Operator + (r'\\', Operator), # M2R10 + ObjM2 + # Relational Operators + (r'[=#<>]', Operator), + # Dereferencing Operator + (r'\^', Operator), + # Dereferencing Operator Synonym + (r'@', Operator), # ISO + # Logical AND Operator Synonym + (r'&', Operator), # PIM + ISO + # Logical NOT Operator Synonym + (r'~', Operator), # PIM + ISO + # Smalltalk Message Prefix + (r'`', Operator), # ObjM2 + ], + 'digraph_punctuation': [ + # Range Constructor + (r'\.\.', Punctuation), + # Opening Chevron Bracket + (r'<<', Punctuation), # M2R10 + ISO + # Closing Chevron Bracket + (r'>>', Punctuation), # M2R10 + ISO + # Blueprint Punctuation + (r'->', Punctuation), # M2R10 + ISO + # Distinguish |# and # in M2 R10 + (r'\|#', Punctuation), + # Distinguish ## and # in M2 R10 + (r'##', Punctuation), + # Distinguish |* and * in M2 R10 + (r'\|\*', Punctuation), + ], + 'unigraph_punctuation': [ + # Common Punctuation + (r'[\(\)\[\]{},.:;\|]', Punctuation), + # Case Label Separator Synonym + (r'!', Punctuation), # ISO + # Blueprint Punctuation + (r'\?', Punctuation), # M2R10 + ObjM2 + ], + 'comments': [ + # Single Line Comment + (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2 + # Block Comment + (r'\(\*([^$].*?)\*\)', Comment.Multiline), + # Template Block Comment + (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2 + ], + 'pragmas': [ + # ISO Style Pragmas + (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2 + # Pascal Style Pragmas + (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM + ], + 'root': [ + include('whitespace'), + include('dialecttags'), + include('pragmas'), + include('comments'), + include('identifiers'), + include('suffixed_number_literals'), # PIM + ISO + include('prefixed_number_literals'), # M2R10 + ObjM2 + include('plain_number_literals'), + include('string_literals'), + include('digraph_punctuation'), + include('digraph_operators'), + include('unigraph_punctuation'), + include('unigraph_operators'), + ] + } + +# C o m m o n D a t a s e t s + + # Common Reserved Words Dataset + common_reserved_words = ( + # 37 common reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF', + 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', + 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', + 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', + ) + + # Common Builtins Dataset + common_builtins = ( + # 16 common builtins + 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER', + 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL', + 'TRUE', + ) + + # Common Pseudo-Module Builtins Dataset + common_pseudo_builtins = ( + # 4 common pseudo builtins + 'ADDRESS', 'BYTE', 'WORD', 'ADR' + ) + +# P I M M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for PIM Modula-2 + pim_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', + '+>', '->', '<<', '>>', '|#', '##', + ) + + # PIM Modula-2 Additional Reserved Words Dataset + pim_additional_reserved_words = ( + # 3 additional reserved words + 'EXPORT', 'QUALIFIED', 'WITH', + ) + + # PIM Modula-2 Additional Builtins Dataset + pim_additional_builtins = ( + # 16 additional builtins + 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH', + 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL', + ) + + # PIM Modula-2 Additional Pseudo-Module Builtins Dataset + pim_additional_pseudo_builtins = ( + # 5 additional pseudo builtins + 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER', + ) + +# I S O M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for ISO Modula-2 + iso_lexemes_to_reject = ( + '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->', + '<<', '>>', '|#', '##', + ) + + # ISO Modula-2 Additional Reserved Words Dataset + iso_additional_reserved_words = ( + # 9 additional reserved words (ISO 10514-1) + 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED', + 'REM', 'RETRY', 'WITH', + # 10 additional reserved words (ISO 10514-2 & ISO 10514-3) + 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY', + 'REVEAL', 'TRACED', 'UNSAFEGUARDED', + ) + + # ISO Modula-2 Additional Builtins Dataset + iso_additional_builtins = ( + # 26 additional builtins (ISO 10514-1) + 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', + 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH', + 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE', + 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', + # 5 additional builtins (ISO 10514-2 & ISO 10514-3) + 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF', + ) + + # ISO Modula-2 Additional Pseudo-Module Builtins Dataset + iso_additional_pseudo_builtins = ( + # 14 additional builtins (SYSTEM) + 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC', + 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR', + 'ROTATE', 'SHIFT', 'CAST', 'TSIZE', + # 13 additional builtins (COROUTINES) + 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER', + 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN', + 'NEWCOROUTINE', 'PROT', 'TRANSFER', + # 9 additional builtins (EXCEPTIONS) + 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber', + 'ExceptionSource', 'GetMessage', 'IsCurrentSource', + 'IsExceptionalExecution', 'RAISE', + # 3 additional builtins (TERMINATION) + 'TERMINATION', 'IsTerminating', 'HasHalted', + # 4 additional builtins (M2EXCEPTION) + 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception', + 'indexException', 'rangeException', 'caseSelectException', + 'invalidLocation', 'functionException', 'wholeValueException', + 'wholeDivException', 'realValueException', 'realDivException', + 'complexValueException', 'complexDivException', 'protException', + 'sysException', 'coException', 'exException', + ) + +# M o d u l a - 2 R 1 0 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Modula-2 R10 + m2r10_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '&', '<>', + ) + + # Modula-2 R10 reserved words in addition to the common set + m2r10_additional_reserved_words = ( + # 12 additional reserved words + 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE', + 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN', + # 2 additional reserved words with symbolic assembly option + 'ASM', 'REG', + ) + + # Modula-2 R10 builtins in addition to the common set + m2r10_additional_builtins = ( + # 26 additional builtins + 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD', + 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT', + 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', + 'UNICHAR', 'WRITE', 'WRITEF', + ) + + # Modula-2 R10 Additional Pseudo-Module Builtins Dataset + m2r10_additional_pseudo_builtins = ( + # 13 additional builtins (TPROPERTIES) + 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL', + 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION', + 'TMAXEXP', 'TMINEXP', + # 4 additional builtins (CONVERSION) + 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL', + # 35 additional builtins (UNSAFE) + 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC', + 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC', + 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR', + 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT', + 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC', + # 11 additional builtins (ATOMIC) + 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND', + 'BWNAND', 'BWOR', 'BWXOR', + # 7 additional builtins (COMPILER) + 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT', + 'HASH', + # 5 additional builtins (ASSEMBLER) + 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE', + ) + +# O b j e c t i v e M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Objective Modula-2 + objm2_lexemes_to_reject = ( + '!', '$', '%', '&', '<>', + ) + + # Objective Modula-2 Extensions + # reserved words in addition to Modula-2 R10 + objm2_additional_reserved_words = ( + # 16 additional reserved words + 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', + 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', + 'SUPER', 'TRY', + ) + + # Objective Modula-2 Extensions + # builtins in addition to Modula-2 R10 + objm2_additional_builtins = ( + # 3 additional builtins + 'OBJECT', 'NO', 'YES', + ) + + # Objective Modula-2 Extensions + # pseudo-module builtins in addition to Modula-2 R10 + objm2_additional_pseudo_builtins = ( + # None + ) + +# A g l e t M o d u l a - 2 D a t a s e t s + + # Aglet Extensions + # reserved words in addition to ISO Modula-2 + aglet_additional_reserved_words = ( + # None + ) + + # Aglet Extensions + # builtins in addition to ISO Modula-2 + aglet_additional_builtins = ( + # 9 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32', + ) + + # Aglet Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + aglet_additional_pseudo_builtins = ( + # None + ) + +# G N U M o d u l a - 2 D a t a s e t s + + # GNU Extensions + # reserved words in addition to PIM Modula-2 + gm2_additional_reserved_words = ( + # 10 additional reserved words + 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', + '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', + ) + + # GNU Extensions + # builtins in addition to PIM Modula-2 + gm2_additional_builtins = ( + # 21 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', + 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', + 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', + ) + + # GNU Extensions + # pseudo-module builtins in addition to PIM Modula-2 + gm2_additional_pseudo_builtins = ( + # None + ) + +# p 1 M o d u l a - 2 D a t a s e t s + + # p1 Extensions + # reserved words in addition to ISO Modula-2 + p1_additional_reserved_words = ( + # None + ) + + # p1 Extensions + # builtins in addition to ISO Modula-2 + p1_additional_builtins = ( + # None + ) + + # p1 Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + p1_additional_pseudo_builtins = ( + # 1 additional builtin + 'BCD', + ) + +# X D S M o d u l a - 2 D a t a s e t s + + # XDS Extensions + # reserved words in addition to ISO Modula-2 + xds_additional_reserved_words = ( + # 1 additional reserved word + 'SEQ', + ) + + # XDS Extensions + # builtins in addition to ISO Modula-2 + xds_additional_builtins = ( + # 9 additional builtins + 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN', + 'LONGCARD', 'SHORTCARD', 'SHORTINT', + ) + + # XDS Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + xds_additional_pseudo_builtins = ( + # 22 additional builtins (SYSTEM) + 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8', + 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE', + 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void' + # 3 additional builtins (COMPILER) + 'COMPILER', 'OPTION', 'EQUATION' + ) + +# P I M S t a n d a r d L i b r a r y D a t a s e t s + + # PIM Modula-2 Standard Library Modules Dataset + pim_stdlib_module_identifiers = ( + 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage', + ) + + # PIM Modula-2 Standard Library Types Dataset + pim_stdlib_type_identifiers = ( + 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission', + 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand', + 'DirectoryCommand', + ) + + # PIM Modula-2 Standard Library Procedures Dataset + pim_stdlib_proc_identifiers = ( + 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn', + 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite', + 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset', + 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar', + 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName', + 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput', + 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd', + 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd', + 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp', + 'ln', 'sin', 'cos', 'arctan', 'entier','ALLOCATE', 'DEALLOCATE', + ) + + # PIM Modula-2 Standard Library Variables Dataset + pim_stdlib_var_identifiers = ( + 'Done', 'termCH', 'in', 'out' + ) + + # PIM Modula-2 Standard Library Constants Dataset + pim_stdlib_const_identifiers = ( + 'EOL', + ) + +# I S O S t a n d a r d L i b r a r y D a t a s e t s + + # ISO Modula-2 Standard Library Modules Dataset + iso_stdlib_module_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Types Dataset + iso_stdlib_type_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Procedures Dataset + iso_stdlib_proc_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Variables Dataset + iso_stdlib_var_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Constants Dataset + iso_stdlib_const_identifiers = ( + # TO DO + ) + +# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s + + # Modula-2 R10 Standard Library ADTs Dataset + m2r10_stdlib_adt_identifiers = ( + 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET', + 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD', + 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT', + 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64', + 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8', + 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8', + 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16', + 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32', + 'INT64', 'INT128', 'STRING', 'UNISTRING', + ) + + # Modula-2 R10 Standard Library Blueprints Dataset + m2r10_stdlib_blueprint_identifiers = ( + 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar', + 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal', + 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray', + 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet', + 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet', + 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension', + 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath', + ) + + # Modula-2 R10 Standard Library Modules Dataset + m2r10_stdlib_module_identifiers = ( + 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO', + 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO', + 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath', + 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath', + 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport', + ) + + # Modula-2 R10 Standard Library Types Dataset + m2r10_stdlib_type_identifiers = ( + 'File', 'Status', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Procedures Dataset + m2r10_stdlib_proc_identifiers = ( + 'ALLOCATE', 'DEALLOCATE', 'SIZE', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Variables Dataset + m2r10_stdlib_var_identifiers = ( + 'stdIn', 'stdOut', 'stdErr', + ) + + # Modula-2 R10 Standard Library Constants Dataset + m2r10_stdlib_const_identifiers = ( + 'pi', 'tau', + ) + +# D i a l e c t s + + + # Dialect modes + dialects = ( + 'unknown', + 'm2pim', 'm2iso', 'm2r10', 'objm2', + 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds', + ) + +# D a t a b a s e s + + # Lexemes to Mark as Errors Database + lexemes_to_reject_db = { + # Lexemes to reject for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Lexemes to reject for PIM Modula-2 + 'm2pim' : ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for ISO Modula-2 + 'm2iso' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for Modula-2 R10 + 'm2r10' : ( + m2r10_lexemes_to_reject, + ), + # Lexemes to reject for Objective Modula-2 + 'objm2' : ( + objm2_lexemes_to_reject, + ), + # Lexemes to reject for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for GNU Modula-2 + 'm2pim+gm2' : ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for p1 Modula-2 + 'm2iso+p1' : ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for XDS Modula-2 + 'm2iso+xds' : ( + iso_lexemes_to_reject, + ), + } + + # Reserved Words Database + reserved_words_db = { + # Reserved words for unknown dialect + 'unknown' : ( + common_reserved_words, + pim_additional_reserved_words, + iso_additional_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for PIM Modula-2 + 'm2pim' : ( + common_reserved_words, + pim_additional_reserved_words, + ), + + # Reserved words for Modula-2 R10 + 'm2iso' : ( + common_reserved_words, + iso_additional_reserved_words, + ), + + # Reserved words for ISO Modula-2 + 'm2r10' : ( + common_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for Objective Modula-2 + 'objm2' : ( + common_reserved_words, + m2r10_additional_reserved_words, + objm2_additional_reserved_words, + ), + + # Reserved words for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_reserved_words, + iso_additional_reserved_words, + aglet_additional_reserved_words, + ), + + # Reserved words for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_reserved_words, + pim_additional_reserved_words, + gm2_additional_reserved_words, + ), + + # Reserved words for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_reserved_words, + iso_additional_reserved_words, + p1_additional_reserved_words, + ), + + # Reserved words for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_reserved_words, + iso_additional_reserved_words, + xds_additional_reserved_words, + ), + } + + # Builtins Database + builtins_db = { + # Builtins for unknown dialect + 'unknown' : ( + common_builtins, + pim_additional_builtins, + iso_additional_builtins, + m2r10_additional_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim' : ( + common_builtins, + pim_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso' : ( + common_builtins, + iso_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10' : ( + common_builtins, + m2r10_additional_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2' : ( + common_builtins, + m2r10_additional_builtins, + objm2_additional_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_builtins, + iso_additional_builtins, + aglet_additional_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_builtins, + pim_additional_builtins, + gm2_additional_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_builtins, + iso_additional_builtins, + p1_additional_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_builtins, + iso_additional_builtins, + xds_additional_builtins, + ), + } + + # Pseudo-Module Builtins Database + pseudo_builtins_db = { + # Builtins for unknown dialect + 'unknown' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + iso_additional_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10' : ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2' : ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + objm2_additional_pseudo_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + aglet_additional_pseudo_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2' : ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + gm2_additional_pseudo_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + p1_additional_pseudo_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds' : ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + xds_additional_pseudo_builtins, + ), + } + + # Standard Library ADTs Database + stdlib_adts_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library ADTs for PIM Modula-2 + 'm2pim' : ( + # No first class library types + ), + + # Standard Library ADTs for ISO Modula-2 + 'm2iso' : ( + # No first class library types + ), + + # Standard Library ADTs for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Aglet Modula-2 + 'm2iso+aglet' : ( + # No first class library types + ), + + # Standard Library ADTs for GNU Modula-2 + 'm2pim+gm2' : ( + # No first class library types + ), + + # Standard Library ADTs for p1 Modula-2 + 'm2iso+p1' : ( + # No first class library types + ), + + # Standard Library ADTs for XDS Modula-2 + 'm2iso+xds' : ( + # No first class library types + ), + } + + # Standard Library Modules Database + stdlib_modules_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Modules for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library Modules for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + ), + + # Standard Library Modules for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_module_identifiers, + ), + } + + # Standard Library Types Database + stdlib_types_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Types for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_type_identifiers, + ), + } + + # Standard Library Procedures Database + stdlib_procedures_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Procedures for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_proc_identifiers, + ), + } + + # Standard Library Variables Database + stdlib_variables_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Variables for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_var_identifiers, + ), + } + + # Standard Library Constants Database + stdlib_constants_db = { + # Empty entry for unknown dialect + 'unknown' : ( + # LEAVE THIS EMPTY + ), + # Standard Library Constants for PIM Modula-2 + 'm2pim' : ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for ISO Modula-2 + 'm2iso' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for Modula-2 R10 + 'm2r10' : ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Objective Modula-2 + 'objm2' : ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Aglet Modula-2 + 'm2iso+aglet' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for GNU Modula-2 + 'm2pim+gm2' : ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for p1 Modula-2 + 'm2iso+p1' : ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for XDS Modula-2 + 'm2iso+xds' : ( + iso_stdlib_const_identifiers, + ), + } + +# M e t h o d s + + # initialise a lexer instance + def __init__(self, **options): + # + # Alias for unknown dialect + global UNKNOWN + UNKNOWN = self.dialects[0] + # + # check dialect options + # + dialects = get_list_opt(options, 'dialect', []) + # + for dialect_option in dialects: + if dialect_option in self.dialects[1:-1]: + # valid dialect option found + self.set_dialect(dialect_option) + break + # + # Fallback Mode (DEFAULT) + else: + # no valid dialect option + self.set_dialect(UNKNOWN) + # + self.dialect_set_by_tag = False + # + # check style options + # + styles = get_list_opt(options, 'style', []) + # + # use lowercase mode for Algol style + if 'algol' in styles or 'algol_nu' in styles: + self.algol_publication_mode = True + else: + self.algol_publication_mode = False + # + # Check option flags + # + self.treat_stdlib_adts_as_builtins = \ + get_bool_opt(options, 'treat_stdlib_adts_as_builtins', True) + # + # call superclass initialiser + RegexLexer.__init__(self, **options) + + # Set lexer to a specified dialect + def set_dialect(self, dialect_id): + # + #if __debug__: + # print 'entered set_dialect with arg: ', dialect_id + # + # check dialect name against known dialects + if dialect_id not in self.dialects: + dialect = UNKNOWN # default + else: + dialect = dialect_id + # + # compose lexemes to reject set + lexemes_to_reject_set = set() + # add each list of reject lexemes for this dialect + for list in self.lexemes_to_reject_db[dialect]: + lexemes_to_reject_set.update(set(list)) + # + # compose reserved words set + reswords_set = set() + # add each list of reserved words for this dialect + for list in self.reserved_words_db[dialect]: + reswords_set.update(set(list)) + # + # compose builtins set + builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.builtins_db[dialect]: + builtins_set.update(set(list).difference(reswords_set)) + # + # compose pseudo-builtins set + pseudo_builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.pseudo_builtins_db[dialect]: + pseudo_builtins_set.update(set(list).difference(reswords_set)) + # + # compose ADTs set + adts_set = set() + # add each list of ADTs for this dialect excluding reserved words + for list in self.stdlib_adts_db[dialect]: + adts_set.update(set(list).difference(reswords_set)) + # + # compose modules set + modules_set = set() + # add each list of builtins for this dialect excluding builtins + for list in self.stdlib_modules_db[dialect]: + modules_set.update(set(list).difference(builtins_set)) + # + # compose types set + types_set = set() + # add each list of types for this dialect excluding builtins + for list in self.stdlib_types_db[dialect]: + types_set.update(set(list).difference(builtins_set)) + # + # compose procedures set + procedures_set = set() + # add each list of procedures for this dialect excluding builtins + for list in self.stdlib_procedures_db[dialect]: + procedures_set.update(set(list).difference(builtins_set)) + # + # compose variables set + variables_set = set() + # add each list of variables for this dialect excluding builtins + for list in self.stdlib_variables_db[dialect]: + variables_set.update(set(list).difference(builtins_set)) + # + # compose constants set + constants_set = set() + # add each list of constants for this dialect excluding builtins + for list in self.stdlib_constants_db[dialect]: + constants_set.update(set(list).difference(builtins_set)) + # + # update lexer state + self.dialect = dialect + self.lexemes_to_reject = lexemes_to_reject_set + self.reserved_words = reswords_set + self.builtins = builtins_set + self.pseudo_builtins = pseudo_builtins_set + self.adts = adts_set + self.modules = modules_set + self.types = types_set + self.procedures = procedures_set + self.variables = variables_set + self.constants = constants_set + # + #if __debug__: + # print 'exiting set_dialect' + # print ' self.dialect: ', self.dialect + # print ' self.lexemes_to_reject: ', self.lexemes_to_reject + # print ' self.reserved_words: ', self.reserved_words + # print ' self.builtins: ', self.builtins + # print ' self.pseudo_builtins: ', self.pseudo_builtins + # print ' self.adts: ', self.adts + # print ' self.modules: ', self.modules + # print ' self.types: ', self.types + # print ' self.procedures: ', self.procedures + # print ' self.variables: ', self.variables + # print ' self.types: ', self.types + # print ' self.constants: ', self.constants + + # Extracts a dialect name from a dialect tag comment string and checks + # the extracted name against known dialects. If a match is found, the + # matching name is returned, otherwise dialect id 'unknown' is returned + def get_dialect_from_dialect_tag(self, dialect_tag): + # + #if __debug__: + # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag + # + # constants + left_tag_delim = '(*!' + right_tag_delim = '*)' + left_tag_delim_len = len(left_tag_delim) + right_tag_delim_len = len(right_tag_delim) + indicator_start = left_tag_delim_len + indicator_end = -(right_tag_delim_len) + # + # check comment string for dialect indicator + if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \ + and dialect_tag.startswith(left_tag_delim) \ + and dialect_tag.endswith(right_tag_delim): + # + #if __debug__: + # print 'dialect tag found' + # + # extract dialect indicator + indicator = dialect_tag[indicator_start:indicator_end] + # + #if __debug__: + # print 'extracted: ', indicator + # + # check against known dialects + for index in range(1, len(self.dialects)): + # + #if __debug__: + # print 'dialects[', index, ']: ', self.dialects[index] + # + if indicator == self.dialects[index]: + # + #if __debug__: + # print 'matching dialect found' + # + # indicator matches known dialect + return indicator + else: + # indicator does not match any dialect + return UNKNOWN # default + else: + # invalid indicator string + return UNKNOWN # default + + # intercept the token stream, modify token attributes and return them + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + # + # check for dialect tag if dialect has not been set by tag + if not self.dialect_set_by_tag and token == Comment.Special: + indicated_dialect = self.get_dialect_from_dialect_tag(value) + if indicated_dialect != UNKNOWN: + # token is a dialect indicator + # reset reserved words and builtins + self.set_dialect(indicated_dialect) + self.dialect_set_by_tag = True + # + # check for reserved words, predefined and stdlib identifiers + if token is Name: + if value in self.reserved_words: + token = Keyword.Reserved + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.builtins: + token = Name.Builtin + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.pseudo_builtins: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.adts: + if not self.treat_stdlib_adts_as_builtins: + token = Name.Namespace + else: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.modules: + token = Name.Namespace + # + elif value in self.types: + token = Name.Class + # + elif value in self.procedures: + token = Name.Function + # + elif value in self.variables: + token = Name.Variable + # + elif value in self.constants: + token = Name.Constant + # + elif token in Number: + # + # mark prefix number literals as error for PIM and ISO dialects + if self.dialect not in (UNKNOWN, 'm2r10', 'objm2'): + if "'" in value or value[0:2] in ('0b', '0x', '0u'): + token = Error + # + elif self.dialect in ('m2r10', 'objm2'): + # mark base-8 number literals as errors for M2 R10 and ObjM2 + if token is Number.Oct: + token = Error + # mark suffix base-16 literals as errors for M2 R10 and ObjM2 + elif token is Number.Hex and 'H' in value: + token = Error + # mark real numbers with E as errors for M2 R10 and ObjM2 + elif token is Number.Float and 'E' in value: + token = Error + # + elif token in Comment: + # + # mark single line comment as error for PIM and ISO dialects + if token is Comment.Single: + if self.dialect not in [UNKNOWN, 'm2r10', 'objm2']: + token = Error + # + if token is Comment.Preproc: + # mark ISO pragma as error for PIM dialects + if value.startswith('<*') and \ + self.dialect.startswith('m2pim'): + token = Error + # mark PIM pragma as comment for other dialects + elif value.startswith('(*$') and \ + self.dialect != UNKNOWN and \ + not self.dialect.startswith('m2pim'): + token = Comment.Multiline + # + else: # token is neither Name nor Comment + # + # mark lexemes matching the dialect's error token set as errors + if value in self.lexemes_to_reject: + token = Error + # + # substitute lexemes when in Algol mode + if self.algol_publication_mode: + if value == '#': + value = u'≠' + elif value == '<=': + value = u'≤' + elif value == '>=': + value = u'≥' + elif value == '==': + value = u'≡' + elif value == '*.': + value = u'•' + + # return result + yield index, token, value diff --git a/pygments/lexers/parasail.py b/pygments/lexers/parasail.py new file mode 100644 index 00000000..3cfffbee --- /dev/null +++ b/pygments/lexers/parasail.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.parasail + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for ParaSail. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ + this, combined, inherit, do_insertions, default +from pygments.util import get_bool_opt, get_list_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Literal + +__all__ = ['ParaSailLexer'] + + +class ParaSailLexer(RegexLexer): + """ + For `ParaSail <http://www.parasail-lang.org>`_ source code. + + .. versionadded:: 2.1 + """ + + name = 'ParaSail' + aliases = ['parasail'] + filenames = ['*.psi', '*.psl'] + mimetypes = ['text/x-parasail'] + + flags = re.MULTILINE + + tokens = { + 'root': [ + (r'[^\S\n]+', Text), + (r'//.*?\n', Comment.Single), + (r'\b(and|or|xor)=', Operator.Word), + (r'\b(and(\s+then)?|or(\s+else)?|xor|rem|mod|' + r'(is|not)\s+null)\b', + Operator.Word), + # Keywords + (r'\b(abs|abstract|all|block|class|concurrent|const|continue|' + r'each|end|exit|extends|exports|forward|func|global|implements|' + r'import|in|interface|is|lambda|locked|new|not|null|of|op|' + r'optional|private|queued|ref|return|reverse|separate|some|' + r'type|until|var|with|' + # Control flow + r'if|then|else|elsif|case|for|while|loop)\b', + Keyword.Reserved), + (r'(abstract\s+)?(interface|class|op|func|type)', + Keyword.Declaration), + # Literals + (r'"[^"]*"', String), + (r'\\[\'ntrf"0]', String.Escape), + (r'#[a-zA-Z]\w*', Literal), #Enumeration + include('numbers'), + (r"'[^']'", String.Char), + (r'[a-zA-Z]\w*', Name), + # Operators and Punctuation + (r'(<==|==>|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|' + r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\||\|=|/=|\+|-|\*|/|' + r'\.\.|<\.\.|\.\.<|<\.\.<)', + Operator), + (r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)', + Punctuation), + (r'\n+', Text), + ], + 'numbers' : [ + (r'\d[0-9_]*#[0-9a-fA-F][0-9a-fA-F_]*#', Number.Hex), # any base + (r'0[xX][0-9a-fA-F][0-9a-fA-F_]*', Number.Hex), # C-like hex + (r'0[bB][01][01_]*', Number.Bin), # C-like bin + (r'\d[0-9_]*\.\d[0-9_]*[eE][+-]\d[0-9_]*', # float exp + Number.Float), + (r'\d[0-9_]*\.\d[0-9_]*', Number.Float), # float + (r'\d[0-9_]*', Number.Integer), # integer + ], + } diff --git a/pygments/lexers/pascal.py b/pygments/lexers/pascal.py index 2895fba7..d3ce6a3a 100644 --- a/pygments/lexers/pascal.py +++ b/pygments/lexers/pascal.py @@ -18,7 +18,9 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Error from pygments.scanner import Scanner -__all__ = ['DelphiLexer', 'Modula2Lexer', 'AdaLexer'] +from pygments.lexers.modula2 import Modula2Lexer + +__all__ = ['DelphiLexer', 'AdaLexer'] class DelphiLexer(Lexer): @@ -505,198 +507,6 @@ class DelphiLexer(Lexer): yield scanner.start_pos, token, scanner.match or '' -class Modula2Lexer(RegexLexer): - """ - For `Modula-2 <http://www.modula2.org/>`_ source code. - - Additional options that determine which keywords are highlighted: - - `pim` - Select PIM Modula-2 dialect (default: True). - `iso` - Select ISO Modula-2 dialect (default: False). - `objm2` - Select Objective Modula-2 dialect (default: False). - `gm2ext` - Also highlight GNU extensions (default: False). - - .. versionadded:: 1.3 - """ - name = 'Modula-2' - aliases = ['modula2', 'm2'] - filenames = ['*.def', '*.mod'] - mimetypes = ['text/x-modula2'] - - flags = re.MULTILINE | re.DOTALL - - tokens = { - 'whitespace': [ - (r'\n+', Text), # blank lines - (r'\s+', Text), # whitespace - ], - 'identifiers': [ - (r'([a-zA-Z_$][\w$]*)', Name), - ], - 'numliterals': [ - (r'[01]+B', Number.Bin), # binary number (ObjM2) - (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) - (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) - (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) - (r'[0-9A-F]+H', Number.Hex), # hexadecimal number - (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number - (r'[0-9]+\.[0-9]+', Number.Float), # real number - (r'[0-9]+', Number.Integer), # decimal whole number - ], - 'strings': [ - (r"'(\\\\|\\'|[^'])*'", String), # single quoted string - (r'"(\\\\|\\"|[^"])*"', String), # double quoted string - ], - 'operators': [ - (r'[*/+=#~&<>\^-]', Operator), - (r':=', Operator), # assignment - (r'@', Operator), # pointer deref (ISO) - (r'\.\.', Operator), # ellipsis or range - (r'`', Operator), # Smalltalk message (ObjM2) - (r'::', Operator), # type conversion (ObjM2) - ], - 'punctuation': [ - (r'[()\[\]{},.:;|]', Punctuation), - ], - 'comments': [ - (r'//.*?\n', Comment.Single), # ObjM2 - (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 - (r'\(\*([^$].*?)\*\)', Comment.Multiline), - # TO DO: nesting of (* ... *) comments - ], - 'pragmas': [ - (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM - (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 - ], - 'root': [ - include('whitespace'), - include('comments'), - include('pragmas'), - include('identifiers'), - include('numliterals'), - include('strings'), - include('operators'), - include('punctuation'), - ] - } - - pim_reserved_words = [ - # 40 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', - 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', - 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', - 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', - 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', - 'UNTIL', 'VAR', 'WHILE', 'WITH', - ] - - pim_pervasives = [ - # 31 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', - 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', - 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', - 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', - ] - - iso_reserved_words = [ - # 46 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', - 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', - 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', - 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', - 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', - 'WITH', - ] - - iso_pervasives = [ - # 42 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', - 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', - 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', - 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', - 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', - 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', - ] - - objm2_reserved_words = [ - # base language, 42 reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', - 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', - 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', - 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', - 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', - # OO extensions, 16 reserved words - 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', - 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', - 'SUPER', 'TRY', - ] - - objm2_pervasives = [ - # base language, 38 pervasives - 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', - 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', - 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', - 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', - 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', - # OO extensions, 3 pervasives - 'OBJECT', 'NO', 'YES', - ] - - gnu_reserved_words = [ - # 10 additional reserved words - 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', - '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', - ] - - gnu_pervasives = [ - # 21 identifiers, actually from pseudo-module SYSTEM - # but we will highlight them as if they were pervasives - 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', - 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', - 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', - 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', - ] - - def __init__(self, **options): - self.reserved_words = set() - self.pervasives = set() - # ISO Modula-2 - if get_bool_opt(options, 'iso', False): - self.reserved_words.update(self.iso_reserved_words) - self.pervasives.update(self.iso_pervasives) - # Objective Modula-2 - elif get_bool_opt(options, 'objm2', False): - self.reserved_words.update(self.objm2_reserved_words) - self.pervasives.update(self.objm2_pervasives) - # PIM Modula-2 (DEFAULT) - else: - self.reserved_words.update(self.pim_reserved_words) - self.pervasives.update(self.pim_pervasives) - # GNU extensions - if get_bool_opt(options, 'gm2ext', False): - self.reserved_words.update(self.gnu_reserved_words) - self.pervasives.update(self.gnu_pervasives) - # initialise - RegexLexer.__init__(self, **options) - - def get_tokens_unprocessed(self, text): - for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): - # check for reserved words and pervasives - if token is Name: - if value in self.reserved_words: - token = Keyword.Reserved - elif value in self.pervasives: - token = Keyword.Pervasive - # return result - yield index, token, value - - class AdaLexer(RegexLexer): """ For Ada source code. diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py index 7e70b3ee..b78963d0 100644 --- a/pygments/lexers/perl.py +++ b/pygments/lexers/perl.py @@ -46,6 +46,7 @@ class PerlLexer(RegexLexer): (r'\$(\\\\|\\[^\\]|[^\\$])*\$[egimosx]*', String.Regex, '#pop'), ], 'root': [ + (r'\A\#!.+?$', Comment.Hashbang), (r'\#.*?$', Comment.Single), (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), (words(( diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py index 2b1c7634..7d32d7f6 100644 --- a/pygments/lexers/prolog.py +++ b/pygments/lexers/prolog.py @@ -155,11 +155,11 @@ class LogtalkLexer(RegexLexer): # Term creation and decomposition (r'(functor|arg|copy_term|numbervars|term_variables)(?=[(])', Keyword), # Evaluable functors - (r'(rem|m(ax|in|od)|abs|sign)(?=[(])', Keyword), + (r'(div|rem|m(ax|in|od)|abs|sign)(?=[(])', Keyword), (r'float(_(integer|fractional)_part)?(?=[(])', Keyword), - (r'(floor|truncate|round|ceiling)(?=[(])', Keyword), + (r'(floor|t(an|runcate)|round|ceiling)(?=[(])', Keyword), # Other arithmetic functors - (r'(cos|a(cos|sin|tan)|exp|log|s(in|qrt))(?=[(])', Keyword), + (r'(cos|a(cos|sin|tan|tan2)|exp|log|s(in|qrt)|xor)(?=[(])', Keyword), # Term testing (r'(var|atom(ic)?|integer|float|c(allable|ompound)|n(onvar|umber)|' r'ground|acyclic_term)(?=[(])', Keyword), @@ -212,7 +212,7 @@ class LogtalkLexer(RegexLexer): (r'(==|\\==|@=<|@<|@>=|@>)', Operator), # Evaluable functors (r'(//|[-+*/])', Operator), - (r'\b(e|pi|mod|rem)\b', Operator), + (r'\b(e|pi|div|mod|rem)\b', Operator), # Other arithemtic functors (r'\b\*\*\b', Operator), # DCG rules diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 3c1aff56..ea97b855 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -41,7 +41,8 @@ class PythonLexer(RegexLexer): (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), - (r'#.*$', Comment), + (r'\A#!.+$', Comment.Hashbang), + (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), @@ -155,10 +156,11 @@ class PythonLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ + # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', String), - # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), # unhandled string formatting sign (r'%', String) @@ -213,11 +215,14 @@ class Python3Lexer(RegexLexer): tokens = PythonLexer.tokens.copy() tokens['keywords'] = [ (words(( - 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', - 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', - 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', - 'with', 'True', 'False', 'None'), suffix=r'\b'), + 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', + 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', + 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', + 'as', 'with'), suffix=r'\b'), Keyword), + (words(( + 'True', 'False', 'None'), suffix=r'\b'), + Keyword.Constant), ] tokens['builtins'] = [ (words(( @@ -241,7 +246,7 @@ class Python3Lexer(RegexLexer): 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', - 'PendingDeprecationWarning', 'ReferenceError', + 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning', 'RuntimeError', 'RuntimeWarning', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', @@ -267,6 +272,7 @@ class Python3Lexer(RegexLexer): tokens['backtick'] = [] tokens['name'] = [ (r'@\w+', Name.Decorator), + (r'@', Operator), # new matrix multiplication operator (uni_name, Name), ] tokens['funcname'] = [ @@ -288,13 +294,21 @@ class Python3Lexer(RegexLexer): (uni_name, Name.Namespace), default('#pop'), ] - # don't highlight "%s" substitutions tokens['strings'] = [ - (r'[^\\\'"%\n]+', String), - # quotes, percents and backslashes must be parsed one at a time + # the old style '%s' % (...) string formatting (still valid in Py3) + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # the new style '{}'.format(...) string formatting + (r'\{' + '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + '(\![sra])?' # conversion + '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' + '\}', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"%\{\n]+', String), (r'[\'"\\]', String), # unhandled string formatting sign - (r'%', String) + (r'%|(\{{1,2})', String) # newlines are an error (use "nl" state) ] diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index fb14629a..4f2037bf 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -12,10 +12,10 @@ import re from pygments.lexer import RegexLexer, bygroups, default -from pygments.token import Keyword, Punctuation, String, Number, Operator, \ +from pygments.token import Keyword, Punctuation, String, Number, Operator, Generic, \ Whitespace, Name, Literal, Comment, Text -__all__ = ['SparqlLexer'] +__all__ = ['SparqlLexer', 'TurtleLexer'] class SparqlLexer(RegexLexer): @@ -97,3 +97,97 @@ class SparqlLexer(RegexLexer): default('#pop:2'), ], } + + +class TurtleLexer(RegexLexer): + """ + Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language. + + .. versionadded:: 2.0 + """ + name = 'Turtle' + aliases = ['turtle'] + filenames = ['*.ttl'] + mimetypes = ['text/turtle', 'application/x-turtle'] + + flags = re.IGNORECASE + + patterns = { + 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range + 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' + } + + # PNAME_NS PN_LOCAL (with simplified character range) + patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns + + tokens = { + 'root': [ + (r'\s+', Whitespace), + + # Base / prefix + (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, + bygroups(Keyword, Whitespace, Name.Variable, Whitespace, + Punctuation)), + (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, + bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, + Name.Variable, Whitespace, Punctuation)), + + # The shorthand predicate 'a' + (r'(?<=\s)a(?=\s)', Keyword.Type), + + # IRIREF + (r'%(IRIREF)s' % patterns, Name.Variable), + + # PrefixedName + (r'%(PrefixedName)s' % patterns, + bygroups(Name.Namespace, Name.Tag)), + + # Comment + (r'#[^\n]+', Comment), + + (r'\b(true|false)\b', Literal), + (r'[+\-]?\d*\.\d+', Number.Float), + (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), + (r'[+\-]?\d+', Number.Integer), + (r'[\[\](){}.;,:^]', Punctuation), + + (r'"""', String, 'triple-double-quoted-string'), + (r'"', String, 'single-double-quoted-string'), + (r"'''", String, 'triple-single-quoted-string'), + (r"'", String, 'single-single-quoted-string'), + ], + 'triple-double-quoted-string': [ + (r'"""', String, 'end-of-string'), + (r'[^\\]+', String), + (r'\\', String, 'string-escape'), + ], + 'single-double-quoted-string': [ + (r'"', String, 'end-of-string'), + (r'[^"\\\n]+', String), + (r'\\', String, 'string-escape'), + ], + 'triple-single-quoted-string': [ + (r"'''", String, 'end-of-string'), + (r'[^\\]+', String), + (r'\\', String, 'string-escape'), + ], + 'single-single-quoted-string': [ + (r"'", String, 'end-of-string'), + (r"[^'\\\n]+", String), + (r'\\', String, 'string-escape'), + ], + 'string-escape': [ + (r'.', String, '#pop'), + ], + 'end-of-string': [ + + (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)', + bygroups(Operator, Generic.Emph), '#pop:2'), + + (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), + (r'(\^\^)%(PrefixedName)s' % patterns, bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'), + + default('#pop:2'), + + ], + } diff --git a/pygments/lexers/robotframework.py b/pygments/lexers/robotframework.py index 56996fa8..eab06efe 100644 --- a/pygments/lexers/robotframework.py +++ b/pygments/lexers/robotframework.py @@ -87,7 +87,7 @@ class RobotFrameworkLexer(Lexer): class VariableTokenizer(object): def tokenize(self, string, token): - var = VariableSplitter(string, identifiers='$@%') + var = VariableSplitter(string, identifiers='$@%&') if var.start < 0 or token in (COMMENT, ERROR): yield string, token return @@ -205,7 +205,7 @@ class Tokenizer(object): def _is_assign(self, value): if value.endswith('='): value = value[:-1].strip() - var = VariableSplitter(value, identifiers='$@') + var = VariableSplitter(value, identifiers='$@&') return var.start == 0 and var.end == len(value) @@ -262,7 +262,7 @@ class TestCaseSetting(Setting): class KeywordSetting(TestCaseSetting): _keyword_settings = ('teardown',) - _other_settings = ('documentation', 'arguments', 'return', 'timeout') + _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags') class Variable(Tokenizer): @@ -465,13 +465,13 @@ class VariableSplitter: self.identifier = self._variable_chars[0] self.base = ''.join(self._variable_chars[2:-1]) self.end = self.start + len(self._variable_chars) - if self._has_list_variable_index(): - self.index = ''.join(self._list_variable_index_chars[1:-1]) - self.end += len(self._list_variable_index_chars) + if self._has_list_or_dict_variable_index(): + self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1]) + self.end += len(self._list_and_dict_variable_index_chars) - def _has_list_variable_index(self): - return self._list_variable_index_chars\ - and self._list_variable_index_chars[-1] == ']' + def _has_list_or_dict_variable_index(self): + return self._list_and_dict_variable_index_chars\ + and self._list_and_dict_variable_index_chars[-1] == ']' def _split(self, string): start_index, max_index = self._find_variable(string) @@ -479,7 +479,7 @@ class VariableSplitter: self._open_curly = 1 self._state = self._variable_state self._variable_chars = [string[start_index], '{'] - self._list_variable_index_chars = [] + self._list_and_dict_variable_index_chars = [] self._string = string start_index += 2 for index, char in enumerate(string[start_index:]): @@ -530,14 +530,14 @@ class VariableSplitter: if char == '}' and not self._is_escaped(self._string, index): self._open_curly -= 1 if self._open_curly == 0: - if not self._is_list_variable(): + if not self._is_list_or_dict_variable(): raise StopIteration self._state = self._waiting_list_variable_index_state elif char in self._identifiers: self._state = self._internal_variable_start_state - def _is_list_variable(self): - return self._variable_chars[0] == '@' + def _is_list_or_dict_variable(self): + return self._variable_chars[0] in ('@','&') def _internal_variable_start_state(self, char, index): self._state = self._variable_state @@ -551,10 +551,10 @@ class VariableSplitter: def _waiting_list_variable_index_state(self, char, index): if char != '[': raise StopIteration - self._list_variable_index_chars.append(char) + self._list_and_dict_variable_index_chars.append(char) self._state = self._list_variable_index_state def _list_variable_index_state(self, char, index): - self._list_variable_index_chars.append(char) + self._list_and_dict_variable_index_chars.append(char) if char == ']': raise StopIteration diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index 74edd7ae..e81d6ecf 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -190,6 +190,7 @@ class RubyLexer(ExtendedRegexLexer): tokens = { 'root': [ + (r'\A#!.+?$', Comment.Hashbang), (r'#.*?$', Comment.Single), (r'=begin\s.*?\n=end.*?$', Comment.Multiline), # keywords @@ -256,13 +257,13 @@ class RubyLexer(ExtendedRegexLexer): r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' r'(?<=(?:\s|;)and\s)|' - r'(?<=(?:\s|;|\.)index\s)|' - r'(?<=(?:\s|;|\.)scan\s)|' - r'(?<=(?:\s|;|\.)sub\s)|' - r'(?<=(?:\s|;|\.)sub!\s)|' - r'(?<=(?:\s|;|\.)gsub\s)|' - r'(?<=(?:\s|;|\.)gsub!\s)|' - r'(?<=(?:\s|;|\.)match\s)|' + r'(?<=\.index\s)|' + r'(?<=\.scan\s)|' + r'(?<=\.sub\s)|' + r'(?<=\.sub!\s)|' + r'(?<=\.gsub\s)|' + r'(?<=\.gsub!\s)|' + r'(?<=\.match\s)|' r'(?<=(?:\s|;)if\s)|' r'(?<=(?:\s|;)elsif\s)|' r'(?<=^when\s)|' diff --git a/pygments/lexers/rust.py b/pygments/lexers/rust.py index 2ca860d6..d8939678 100644 --- a/pygments/lexers/rust.py +++ b/pygments/lexers/rust.py @@ -10,7 +10,7 @@ """ from pygments.lexer import RegexLexer, include, bygroups, words, default -from pygments.token import Comment, Operator, Keyword, Name, String, \ +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Whitespace __all__ = ['RustLexer'] @@ -18,33 +18,40 @@ __all__ = ['RustLexer'] class RustLexer(RegexLexer): """ - Lexer for the Rust programming language (version 0.9). + Lexer for the Rust programming language (version 1.0). .. versionadded:: 1.6 """ name = 'Rust' filenames = ['*.rs'] aliases = ['rust'] - mimetypes = ['text/x-rustsrc'] + mimetypes = ['text/rust'] tokens = { 'root': [ + # rust allows a file to start with a shebang, but if the first line + # starts with #![ then it’s not a shebang but a crate attribute. + (r'#![^[\r\n].*$', Comment.Preproc), + default('base'), + ], + 'base': [ # Whitespace and Comments (r'\n', Whitespace), (r'\s+', Whitespace), - (r'//[/!](.*?)\n', Comment.Doc), + (r'//!.*?\n', String.Doc), + (r'///(\n|[^/].*?\n)', String.Doc), (r'//(.*?)\n', Comment.Single), + (r'/\*\*(\n|[^/*])', String.Doc, 'doccomment'), + (r'/\*!', String.Doc, 'doccomment'), (r'/\*', Comment.Multiline, 'comment'), - # Lifetime - (r"""'[a-zA-Z_]\w*""", Name.Label), # Macro parameters (r"""\$([a-zA-Z_]\w*|\(,?|\),?|,?)""", Comment.Preproc), # Keywords (words(( - 'as', 'box', 'break', 'continue', 'do', 'else', 'enum', 'extern', + 'as', 'box', 'crate', 'do', 'else', 'enum', 'extern', # break and continue are in labels 'fn', 'for', 'if', 'impl', 'in', 'loop', 'match', 'mut', 'priv', - 'proc', 'pub', 'ref', 'return', 'static', '\'static', 'struct', + 'proc', 'pub', 'ref', 'return', 'static', 'struct', 'trait', 'true', 'type', 'unsafe', 'while'), suffix=r'\b'), Keyword), (words(('alignof', 'be', 'const', 'offsetof', 'pure', 'sizeof', @@ -53,44 +60,45 @@ class RustLexer(RegexLexer): (r'(mod|use)\b', Keyword.Namespace), (r'(true|false)\b', Keyword.Constant), (r'let\b', Keyword.Declaration), - (words(('u8', 'u16', 'u32', 'u64', 'i8', 'i16', 'i32', 'i64', 'uint', - 'int', 'f32', 'f64', 'str', 'bool'), suffix=r'\b'), + (words(('u8', 'u16', 'u32', 'u64', 'i8', 'i16', 'i32', 'i64', 'usize', + 'isize', 'f32', 'f64', 'str', 'bool'), suffix=r'\b'), Keyword.Type), (r'self\b', Name.Builtin.Pseudo), - # Prelude + # Prelude (taken from Rust’s src/libstd/prelude.rs) (words(( - 'Freeze', 'Pod', 'Send', 'Sized', 'Add', 'Sub', 'Mul', 'Div', 'Rem', 'Neg', 'Not', 'BitAnd', - 'BitOr', 'BitXor', 'Drop', 'Shl', 'Shr', 'Index', 'Option', 'Some', 'None', 'Result', - 'Ok', 'Err', 'from_str', 'range', 'print', 'println', 'Any', 'AnyOwnExt', 'AnyRefExt', - 'AnyMutRefExt', 'Ascii', 'AsciiCast', 'OnwedAsciiCast', 'AsciiStr', - 'IntoBytes', 'Bool', 'ToCStr', 'Char', 'Clone', 'DeepClone', 'Eq', 'ApproxEq', - 'Ord', 'TotalEq', 'Ordering', 'Less', 'Equal', 'Greater', 'Equiv', 'Container', - 'Mutable', 'Map', 'MutableMap', 'Set', 'MutableSet', 'Default', 'FromStr', - 'Hash', 'FromIterator', 'Extendable', 'Iterator', 'DoubleEndedIterator', - 'RandomAccessIterator', 'CloneableIterator', 'OrdIterator', - 'MutableDoubleEndedIterator', 'ExactSize', 'Times', 'Algebraic', - 'Trigonometric', 'Exponential', 'Hyperbolic', 'Bitwise', 'BitCount', - 'Bounded', 'Integer', 'Fractional', 'Real', 'RealExt', 'Num', 'NumCast', - 'CheckedAdd', 'CheckedSub', 'CheckedMul', 'Orderable', 'Signed', - 'Unsigned', 'Round', 'Primitive', 'Int', 'Float', 'ToStrRadix', - 'ToPrimitive', 'FromPrimitive', 'GenericPath', 'Path', 'PosixPath', - 'WindowsPath', 'RawPtr', 'Buffer', 'Writer', 'Reader', 'Seek', - 'SendStr', 'SendStrOwned', 'SendStrStatic', 'IntoSendStr', 'Str', - 'StrVector', 'StrSlice', 'OwnedStr', 'IterBytes', 'ToStr', 'IntoStr', - 'CopyableTuple', 'ImmutableTuple', 'ImmutableEqVector', 'ImmutableTotalOrdVector', - 'ImmutableCopyableVector', 'OwnedVector', 'OwnedCopyableVector', - 'OwnedEqVector', 'MutableVector', 'MutableTotalOrdVector', - 'Vector', 'VectorVector', 'CopyableVector', 'ImmutableVector', - 'Port', 'Chan', 'SharedChan', 'spawn', 'drop'), suffix=r'\b'), + # Reexported core operators + 'Copy', 'Send', 'Sized', 'Sync', + 'Drop', 'Fn', 'FnMut', 'FnOnce', + + # Reexported functions + 'drop', + + # Reexported types and traits + 'Box', + 'ToOwned', + 'Clone', + 'PartialEq', 'PartialOrd', 'Eq', 'Ord', + 'AsRef', 'AsMut', 'Into', 'From', + 'Default', + 'Iterator', 'Extend', 'IntoIterator', + 'DoubleEndedIterator', 'ExactSizeIterator', + 'Option', + 'Some', 'None', + 'Result', + 'Ok', 'Err', + 'SliceConcatExt', + 'String', 'ToString', + 'Vec', + ), suffix=r'\b'), Name.Builtin), - (r'(ImmutableTuple\d+|Tuple\d+)\b', Name.Builtin), - # Borrowed pointer - (r'(&)(\'[A-Za-z_]\w*)?', bygroups(Operator, Name)), # Labels - (r'\'[A-Za-z_]\w*:', Name.Label), + (r'(break|continue)(\s*)(\'[A-Za-z_]\w*)?', bygroups(Keyword, Text.Whitespace, Name.Label)), # Character Literal - (r"""'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}""" - r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""", + (r"""'(\\['"\\nrt]|\\x[0-7][0-9a-fA-F]|\\0""" + r"""|\\u\{[0-9a-fA-F]{1,6}\}|.)'""", + String.Char), + (r"""b'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\0""" + r"""|\\u\{[0-9a-fA-F]{1,6}\}|.)'""", String.Char), # Binary Literal (r'0b[01_]+', Number.Bin, 'number_lit'), @@ -100,11 +108,16 @@ class RustLexer(RegexLexer): (r'0[xX][0-9a-fA-F_]+', Number.Hex, 'number_lit'), # Decimal Literal (r'[0-9][0-9_]*(\.[0-9_]+[eE][+\-]?[0-9_]+|' - r'\.[0-9_]*|[eE][+\-]?[0-9_]+)', Number.Float, 'number_lit'), + r'\.[0-9_]*(?!\.)|[eE][+\-]?[0-9_]+)', Number.Float, 'number_lit'), (r'[0-9][0-9_]*', Number.Integer, 'number_lit'), # String Literal + (r'b"', String, 'bytestring'), (r'"', String, 'string'), - (r'r(#*)".*?"\1', String.Raw), + (r'b?r(#*)".*?"\1', String), + + # Lifetime + (r"""'static""", Name.Builtin), + (r"""'[a-zA-Z_]\w*""", Name.Attribute), # Operators and Punctuation (r'[{}()\[\],.;]', Punctuation), @@ -129,18 +142,28 @@ class RustLexer(RegexLexer): (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline), ], + 'doccomment': [ + (r'[^*/]+', String.Doc), + (r'/\*', String.Doc, '#push'), + (r'\*/', String.Doc, '#pop'), + (r'[*/]', String.Doc), + ], 'number_lit': [ - (r'[ui](8|16|32|64)', Keyword, '#pop'), + (r'[ui](8|16|32|64|size)', Keyword, '#pop'), (r'f(32|64)', Keyword, '#pop'), default('#pop'), ], 'string': [ (r'"', String, '#pop'), - (r"""\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}""" - r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}""", String.Escape), + (r"""\\['"\\nrt]|\\x[0-7][0-9a-fA-F]|\\0""" + r"""|\\u\{[0-9a-fA-F]{1,6}\}""", String.Escape), (r'[^\\"]+', String), (r'\\', String), ], + 'bytestring': [ + (r"""\\x[89a-fA-F][0-9a-fA-F]""", String.Escape), + include('string'), + ], 'macro{': [ (r'\{', Operator, '#push'), (r'\}', Operator, '#pop'), diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py index 473ea7eb..c09c5ba9 100644 --- a/pygments/lexers/scripting.py +++ b/pygments/lexers/scripting.py @@ -14,11 +14,12 @@ import re from pygments.lexer import RegexLexer, include, bygroups, default, combined, \ words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Error, Whitespace + Number, Punctuation, Error, Whitespace, Other from pygments.util import get_bool_opt, get_list_opt, iteritems __all__ = ['LuaLexer', 'MoonScriptLexer', 'ChaiscriptLexer', 'LSLLexer', - 'AppleScriptLexer', 'RexxLexer', 'MOOCodeLexer', 'HybrisLexer'] + 'AppleScriptLexer', 'RexxLexer', 'MOOCodeLexer', 'HybrisLexer', + 'EasytrieveLexer', 'JclLexer'] class LuaLexer(RegexLexer): @@ -921,3 +922,275 @@ class HybrisLexer(RegexLexer): (r'[\w.]+\*?', Name.Namespace, '#pop') ], } + + +class EasytrieveLexer(RegexLexer): + """ + Easytrieve Plus is a programming language for extracting, filtering and + converting sequential data. Furthermore it can layout data for reports. + It is mainly used on mainframe platforms and can access several of the + mainframe's native file formats. It is somewhat comparable to awk. + + .. versionadded:: 2.1 + """ + name = 'Easytrieve' + aliases = ['easytrieve'] + filenames = ['*.ezt', '*.mac'] + mimetypes = ['text/x-easytrieve'] + flags = 0 + + # Note: We cannot use r'\b' at the start and end of keywords because + # Easytrieve Plus delimiter characters are: + # + # * space ( ) + # * apostrophe (') + # * period (.) + # * comma (,) + # * paranthesis ( and ) + # * colon (:) + # + # Additionally words end once a '*' appears, indicatins a comment. + _DELIMITERS = r' \'.,():\n' + _DELIMITERS_OR_COMENT = _DELIMITERS + '*' + _DELIMITER_PATTERN = '[' + _DELIMITERS + ']' + _DELIMITER_PATTERN_CAPTURE = '(' + _DELIMITER_PATTERN + ')' + _NON_DELIMITER_OR_COMMENT_PATTERN = '[^' + _DELIMITERS_OR_COMENT + ']' + _OPERATORS_PATTERN = u'[.+\\-/=\\[\\](){}<>;,&%¬]' + _KEYWORDS = [ + 'AFTER-BREAK', 'AFTER-LINE', 'AFTER-SCREEN', 'AIM', 'AND', 'ATTR', + 'BEFORE', 'BEFORE-BREAK', 'BEFORE-LINE', 'BEFORE-SCREEN', 'BUSHU', + 'BY', 'CALL', 'CASE', 'CHECKPOINT', 'CHKP', 'CHKP-STATUS', 'CLEAR', + 'CLOSE', 'COL', 'COLOR', 'COMMIT', 'CONTROL', 'COPY', 'CURSOR', 'D', + 'DECLARE', 'DEFAULT', 'DEFINE', 'DELETE', 'DENWA', 'DISPLAY', 'DLI', + 'DO', 'DUPLICATE', 'E', 'ELSE', 'ELSE-IF', 'END', 'END-CASE', + 'END-DO', 'END-IF', 'END-PROC', 'ENDPAGE', 'ENDTABLE', 'ENTER', 'EOF', + 'EQ', 'ERROR', 'EXIT', 'EXTERNAL', 'EZLIB', 'F1', 'F10', 'F11', 'F12', + 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F2', 'F20', 'F21', + 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F3', 'F30', + 'F31', 'F32', 'F33', 'F34', 'F35', 'F36', 'F4', 'F5', 'F6', 'F7', + 'F8', 'F9', 'FETCH', 'FILE-STATUS', 'FILL', 'FINAL', 'FIRST', + 'FIRST-DUP', 'FOR', 'GE', 'GET', 'GO', 'GOTO', 'GQ', 'GR', 'GT', + 'HEADING', 'HEX', 'HIGH-VALUES', 'IDD', 'IDMS', 'IF', 'IN', 'INSERT', + 'JUSTIFY', 'KANJI-DATE', 'KANJI-DATE-LONG', 'KANJI-TIME', 'KEY', + 'KEY-PRESSED', 'KOKUGO', 'KUN', 'LAST-DUP', 'LE', 'LEVEL', 'LIKE', + 'LINE', 'LINE-COUNT', 'LINE-NUMBER', 'LINK', 'LIST', 'LOW-VALUES', + 'LQ', 'LS', 'LT', 'MACRO', 'MASK', 'MATCHED', 'MEND', 'MESSAGE', + 'MOVE', 'MSTART', 'NE', 'NEWPAGE', 'NOMASK', 'NOPRINT', 'NOT', + 'NOTE', 'NOVERIFY', 'NQ', 'NULL', 'OF', 'OR', 'OTHERWISE', 'PA1', + 'PA2', 'PA3', 'PAGE-COUNT', 'PAGE-NUMBER', 'PARM-REGISTER', + 'PATH-ID', 'PATTERN', 'PERFORM', 'POINT', 'POS', 'PRIMARY', 'PRINT', + 'PROCEDURE', 'PROGRAM', 'PUT', 'READ', 'RECORD', 'RECORD-COUNT', + 'RECORD-LENGTH', 'REFRESH', 'RELEASE', 'RENUM', 'REPEAT', 'REPORT', + 'REPORT-INPUT', 'RESHOW', 'RESTART', 'RETRIEVE', 'RETURN-CODE', + 'ROLLBACK', 'ROW', 'S', 'SCREEN', 'SEARCH', 'SECONDARY', 'SELECT', + 'SEQUENCE', 'SIZE', 'SKIP', 'SOKAKU', 'SORT', 'SQL', 'STOP', 'SUM', + 'SYSDATE', 'SYSDATE-LONG', 'SYSIN', 'SYSIPT', 'SYSLST', 'SYSPRINT', + 'SYSSNAP', 'SYSTIME', 'TALLY', 'TERM-COLUMNS', 'TERM-NAME', + 'TERM-ROWS', 'TERMINATION', 'TITLE', 'TO', 'TRANSFER', 'TRC', + 'UNIQUE', 'UNTIL', 'UPDATE', 'UPPERCASE', 'USER', 'USERID', 'VALUE', + 'VERIFY', 'W', 'WHEN', 'WHILE', 'WORK', 'WRITE', 'X', 'XDM', 'XRST' + ] + + tokens = { + 'root': [ + (r'\*.*\n', Comment.Single), + (r'\n+', Whitespace), + # Macro argument + (r'&' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+\.', Name.Variable, 'after_macro_argument'), + # Macro call + (r'%' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Variable), + (r'(FILE|MACRO|REPORT)(\s+)', + bygroups(Keyword.Declaration, Whitespace), 'after_declaration'), + (r'(JOB|PARM)' + r'(' + _DELIMITER_PATTERN + r')', + bygroups(Keyword.Declaration, Operator)), + (words(_KEYWORDS, suffix=_DELIMITER_PATTERN_CAPTURE), + bygroups(Keyword.Reserved, Operator)), + (_OPERATORS_PATTERN, Operator), + # Procedure declaration + (r'(' + _NON_DELIMITER_OR_COMMENT_PATTERN + r'+)(\s*)(\.?)(\s*)(PROC)(\s*\n)', + bygroups(Name.Function, Whitespace, Operator, Whitespace, Keyword.Declaration, Whitespace)), + (r'[0-9]+\.[0-9]*', Number.Float), + (r'[0-9]+', Number.Integer), + (r"'(''|[^'])*'", String), + (r'\s+', Whitespace), + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) # Everything else just belongs to a name + ], + 'after_declaration': [ + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function), + ('', Whitespace, '#pop') + ], + 'after_macro_argument': [ + (r'\*.*\n', Comment.Single, '#pop'), + (r'\s+', Whitespace, '#pop'), + (_OPERATORS_PATTERN, Operator, '#pop'), + (r"'(''|[^'])*'", String, '#pop'), + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) # Everything else just belongs to a name + ], + } + _COMMENT_LINE_REGEX = re.compile(r'^\s*\*') + _MACRO_HEADER_REGEX = re.compile(r'^\s*MACRO') + + def analyse_text(text): + """ + Perform a structural analysis for basic Easytrieve constructs. + """ + result = 0.0 + lines = text.split('\n') + hasEndProc = False + hasHeaderComment = False + hasFile = False + hasJob = False + hasProc = False + hasParm = False + hasReport = False + + def isCommentLine(line): + return EasytrieveLexer._COMMENT_LINE_REGEX.match(lines[0]) is not None + + def isEmptyLine(line): + return not bool(line.strip()) + + # Remove possible empty lines and header comments. + while lines and (isEmptyLine(lines[0]) or isCommentLine(lines[0])): + if not isEmptyLine(lines[0]): + hasHeaderComment = True + del lines[0] + + if EasytrieveLexer._MACRO_HEADER_REGEX.match(lines[0]): + # Looks like an Easytrieve macro. + result = 0.4 + if hasHeaderComment: + result += 0.4 + else: + # Scan the source for lines starting with indicators. + for line in lines: + words = line.split() + if (len(words) >= 2): + firstWord = words[0] + if not hasReport: + if not hasJob: + if not hasFile: + if not hasParm: + if firstWord == 'PARM': + hasParm = True + if firstWord == 'FILE': + hasFile = True + if firstWord == 'JOB': + hasJob = True + elif firstWord == 'PROC': + hasProc = True + elif firstWord == 'END-PROC': + hasEndProc = True + elif firstWord == 'REPORT': + hasReport = True + + # Weight the findings. + if hasJob and (hasProc == hasEndProc): + if hasHeaderComment: + result += 0.1 + if hasParm: + if hasProc: + # Found PARM, JOB and PROC/END-PROC: + # pretty sure this is Easytrieve. + result += 0.8 + else: + # Found PARAM and JOB: probably this is Easytrieve + result += 0.5 + else: + # Found JOB and possibly other keywords: might be Easytrieve + result += 0.11 + if hasParm: + # Note: PARAM is not a proper English word, so this is + # regarded a much better indicator for Easytrieve than + # the other words. + result += 0.2 + if hasFile: + result += 0.01 + if hasReport: + result += 0.01 + assert 0.0 <= result <= 1.0 + return result + + +class JclLexer(RegexLexer): + """ + `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_ + is a scripting language used on mainframe platforms to instruct the system + on how to run a batch job or start a subsystem. It is somewhat + comparable to MS DOS batch and Unix shell scripts. + + .. versionadded:: 2.1 + """ + name = 'JCL' + aliases = ['jcl'] + filenames = ['*.jcl'] + mimetypes = ['text/x-jcl'] + flags = re.IGNORECASE + + tokens = { + 'root': [ + (r'//\*.*\n', Comment.Single), + (r'//', Keyword.Pseudo, 'statement'), + (r'/\*', Keyword.Pseudo, 'jes2_statement'), + # TODO: JES3 statement + (r'.*\n', Other) # Input text or inline code in any language. + ], + 'statement': [ + (r'\s*\n', Whitespace, '#pop'), + (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)', + bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace), + 'option'), + (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'), + (r'\s+', Whitespace, 'statement_command'), + ], + 'statement_command': [ + (r'\s+(command|cntl|dd|endctl|endif|else|include|jcllib|' + r'output|pend|proc|set|then|xmit)\s+', Keyword.Reserved, 'option'), + include('option') + ], + 'jes2_statement': [ + (r'\s*\n', Whitespace, '#pop'), + (r'\$', Keyword, 'option'), + (r'\b(jobparam|message|netacct|notify|output|priority|route|' + r'setup|signoff|xeq|xmit)\b', Keyword, 'option'), + ], + 'option': [ + #(r'\n', Text, 'root'), + (r'\*', Name.Builtin), + (r'[\[\](){}<>;,]', Punctuation), + (r'[-+*/=&%]', Operator), + (r'[a-z_][a-z_0-9]*', Name), + (r'[0-9]+\.[0-9]*', Number.Float), + (r'\.[0-9]+', Number.Float), + (r'[0-9]+', Number.Integer), + (r"'", String, 'option_string'), + (r'[ \t]+', Whitespace, 'option_comment'), + (r'\.', Punctuation), + ], + 'option_string': [ + (r"(\n)(//)", bygroups(Text, Keyword.Pseudo)), + (r"''", String), + (r"[^']", String), + (r"'", String, '#pop'), + ], + 'option_comment': [ + #(r'\n', Text, 'root'), + (r'.+', Comment.Single), + ] + } + + _JOB_HEADER_PATTERN = re.compile(r'^//[a-z#$@][a-z0-9#$@]{0,7}\s+job(\s+.*)?$', re.IGNORECASE) + + def analyse_text(text): + """ + Recognize JCL job by header. + """ + result = 0.0 + lines = text.split('\n') + if len(lines) > 0: + if JclLexer._JOB_HEADER_PATTERN.match(lines[0]): + result = 1.0 + assert 0.0 <= result <= 1.0 + return result + + diff --git a/pygments/lexers/shell.py b/pygments/lexers/shell.py index 810ee7da..cd9cad15 100644 --- a/pygments/lexers/shell.py +++ b/pygments/lexers/shell.py @@ -47,7 +47,9 @@ class BashLexer(RegexLexer): (r'\$\(\(', Keyword, 'math'), (r'\$\(', Keyword, 'paren'), (r'\$\{#?', String.Interpol, 'curly'), - (r'\$#?(\w+|.)', Name.Variable), + (r'\$[a-fA-F_][a-fA-F0-9_]*', Name.Variable), # user variable + (r'\$(?:\d+|[#$?!_*@-])', Name.Variable), # builtin + (r'\$', Text), ], 'basic': [ (r'\b(if|fi|else|while|do|done|for|then|return|function|case|' @@ -60,7 +62,8 @@ class BashLexer(RegexLexer): r'shopt|source|suspend|test|time|times|trap|true|type|typeset|' r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)', Name.Builtin), - (r'#.*\n', Comment), + (r'\A#!.+\n', Comment.Hashbang), + (r'#.*\n', Comment.Single), (r'\\[\w\W]', String.Escape), (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)), (r'[\[\]{}()=]', Operator), diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index f575ed38..646a9f31 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -489,8 +489,8 @@ class MySqlLexer(RegexLexer): r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|' r'declare|default|delayed|delete|desc|describe|deterministic|' r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|' - r'enclosed|escaped|exists|exit|explain|fetch|float|float4|float8' - r'|for|force|foreign|from|fulltext|grant|group|having|' + r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|' + r'float8|for|force|foreign|from|fulltext|grant|group|having|' r'high_priority|hour_microsecond|hour_minute|hour_second|if|' r'ignore|in|index|infile|inner|inout|insensitive|insert|int|' r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|' diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py new file mode 100644 index 00000000..2fc302df --- /dev/null +++ b/pygments/lexers/supercollider.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.supercollider + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexer for SuperCollider + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Other + +__all__ = ['SuperColliderLexer'] + +class SuperColliderLexer(RegexLexer): + """ + For SuperCollider source code. + + .. versionadded:: 2.1 + """ + + name = 'SuperCollider' + aliases = ['sc', 'supercollider'] + filenames = ['*.sc', '*.scd'] + mimetypes = ['application/supercollider', 'text/supercollider', ] + + flags = re.DOTALL | re.MULTILINE + tokens = { + 'commentsandwhitespace': [ + (r'\s+', Text), + (r'<!--', Comment), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline) + ], + 'slashstartsregex': [ + include('commentsandwhitespace'), + (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' + r'([gim]+\b|\B)', String.Regex, '#pop'), + (r'(?=/)', Text, ('#pop', 'badregex')), + (r'', Text, '#pop') + ], + 'badregex': [ + (r'\n', Text, '#pop') + ], + 'root': [ + (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), + include('commentsandwhitespace'), + (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' + r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), + (r'[{(\[;,]', Punctuation, 'slashstartsregex'), + (r'[})\].]', Punctuation), + (words(( + 'for', 'in', 'while', 'do', 'break', 'return', 'continue', + 'switch', 'case', 'default', 'if', 'else', 'throw', 'try', + 'catch', 'finally', 'new', 'delete', 'typeof', 'instanceof', + 'void'), suffix=r'\b'), + Keyword, 'slashstartsregex'), + (words(('var', 'let', 'with', 'function', 'arg'), suffix=r'\b'), + Keyword.Declaration, 'slashstartsregex'), + (words(( + '(abstract', 'boolean', 'byte', 'char', 'class', 'const', + 'debugger', 'double', 'enum', 'export', 'extends', 'final', + 'float', 'goto', 'implements', 'import', 'int', 'interface', + 'long', 'native', 'package', 'private', 'protected', 'public', + 'short', 'static', 'super', 'synchronized', 'throws', + 'transient', 'volatile'), suffix=r'\b'), + Keyword.Reserved), + (words(('true', 'false', 'nil', 'inf'), suffix=r'\b'), Keyword.Constant), + (words(( + 'Array', 'Boolean', 'Date', 'Error', 'Function', 'Number', + 'Object', 'Packages', 'RegExp', 'String', 'Error', + 'isFinite', 'isNaN', 'parseFloat', 'parseInt', 'super', + 'thisFunctionDef', 'thisFunction', 'thisMethod', 'thisProcess', + 'thisThread', 'this'), suffix=r'\b'), + Name.Builtin), + (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), + (r'\\?[$a-zA-Z_][a-zA-Z0-9_]*', String.Symbol), + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + (r'"(\\\\|\\"|[^"])*"', String.Double), + (r"'(\\\\|\\'|[^'])*'", String.Single), + ] + } diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index 3cb73059..71055a9f 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -369,7 +369,7 @@ class DjangoLexer(RegexLexer): r'with(?:(?:out)?\s*context)?|scoped|ignore\s+missing)\b', Keyword), (r'(loop|block|super|forloop)\b', Name.Builtin), - (r'[a-zA-Z][\w-]*', Name.Variable), + (r'[a-zA-Z_][\w-]*', Name.Variable), (r'\.\w+', Name.Variable), (r':?"(\\\\|\\"|[^"])*"', String.Double), (r":?'(\\\\|\\'|[^'])*'", String.Single), @@ -568,10 +568,12 @@ class MasonLexer(RegexLexer): } def analyse_text(text): - rv = 0.0 - if re.search('<&', text) is not None: - rv = 1.0 - return rv + result = 0.0 + if re.search(r'</%(class|doc|init)%>', text) is not None: + result = 1.0 + elif re.search(r'<&.+&>', text, re.DOTALL) is not None: + result = 0.11 + return result class MakoLexer(RegexLexer): diff --git a/pygments/lexers/testing.py b/pygments/lexers/testing.py index 55f4c054..4a91c5b1 100644 --- a/pygments/lexers/testing.py +++ b/pygments/lexers/testing.py @@ -27,9 +27,9 @@ class GherkinLexer(RegexLexer): mimetypes = ['text/x-gherkin'] feature_keywords = u'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$' - feature_element_keywords = u'^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' + feature_element_keywords = u'^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|剧本大纲|剧本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' examples_keywords = u'^(\\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$' - step_keywords = u'^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' + step_keywords = u'^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假设|假如|假定|但是|但し|並且|并且|同時|同时|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' tokens = { 'comments': [ diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py index 9898b05d..47fdc8b6 100644 --- a/pygments/lexers/theorem.py +++ b/pygments/lexers/theorem.py @@ -414,7 +414,8 @@ class LeanLexer(RegexLexer): '-.', '->', '.', '..', '...', '::', ':>', ';', ';;', '<', '<-', '=', '==', '>', '_', '`', '|', '||', '~', '=>', '<=', '>=', '/\\', '\\/', u'∀', u'Π', u'λ', u'↔', u'∧', u'∨', u'≠', u'≤', u'≥', - u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡' + u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡', + u'⟨', u'⟩' ) punctuation = ('(', ')', ':', '{', '}', '[', ']', u'⦃', u'⦄', ':=', ',') diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py index 08b6c969..c37af144 100644 --- a/pygments/lexers/webmisc.py +++ b/pygments/lexers/webmisc.py @@ -731,9 +731,9 @@ class QmlLexer(RegexLexer): # JavascriptLexer above. name = 'QML' - aliases = ['qml'] - filenames = ['*.qml'] - mimetypes = ['application/x-qml'] + aliases = ['qml', 'qbs'] + filenames = ['*.qml', '*.qbs'] + mimetypes = ['application/x-qml', 'application/x-qt.qbs+qml'] # pasted from JavascriptLexer, with some additions flags = re.DOTALL | re.MULTILINE diff --git a/pygments/modeline.py b/pygments/modeline.py index 54df90c4..2200f1cf 100644 --- a/pygments/modeline.py +++ b/pygments/modeline.py @@ -13,16 +13,19 @@ import re __all__ = ['get_filetype_from_buffer'] + modeline_re = re.compile(r''' (?: vi | vim | ex ) (?: [<=>]? \d* )? : .* (?: ft | filetype | syn | syntax ) = ( [^:\s]+ ) ''', re.VERBOSE) + def get_filetype_from_line(l): m = modeline_re.search(l) if m: return m.group(1) + def get_filetype_from_buffer(buf, max_lines=5): """ Scan the buffer for modelines and return filetype if one is found. @@ -32,7 +35,7 @@ def get_filetype_from_buffer(buf, max_lines=5): ret = get_filetype_from_line(l) if ret: return ret - for l in lines[max_lines:0:-1]: + for l in lines[max_lines:-1:-1]: ret = get_filetype_from_line(l) if ret: return ret diff --git a/pygments/style.py b/pygments/style.py index a49e9b7e..b2b990ea 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -40,7 +40,7 @@ class StyleMeta(type): continue ndef = _styles.get(token.parent, None) styledefs = obj.styles.get(token, '').split() - if not ndef or token is None: + if not ndef or token is None: ndef = ['', 0, 0, 0, '', '', 0, 0, 0] elif 'noinherit' in styledefs and token is not Token: ndef = _styles[Token][:] diff --git a/pygments/styles/__init__.py b/pygments/styles/__init__.py index ca657609..d7a0564a 100644 --- a/pygments/styles/__init__.py +++ b/pygments/styles/__init__.py @@ -38,6 +38,9 @@ STYLE_MAP = { 'igor': 'igor::IgorStyle', 'paraiso-light': 'paraiso_light::ParaisoLightStyle', 'paraiso-dark': 'paraiso_dark::ParaisoDarkStyle', + 'lovelace': 'lovelace::LovelaceStyle', + 'algol': 'algol::AlgolStyle', + 'algol_nu': 'algol_nu::Algol_NuStyle', } diff --git a/pygments/styles/algol.py b/pygments/styles/algol.py new file mode 100644 index 00000000..a8726009 --- /dev/null +++ b/pygments/styles/algol.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.algol + ~~~~~~~~~~~~~~~~~~~~~ + + Algol publication style. + + This style renders source code for publication of algorithms in + scientific papers and academic texts, where its format is frequently used. + + It is based on the style of the revised Algol-60 language report[1]. + + o No colours, only black, white and shades of grey are used. + o Keywords are rendered in lowercase underline boldface. + o Builtins are rendered in lowercase boldface italic. + o Docstrings and pragmas are rendered in dark grey boldface. + o Library identifiers are rendered in dark grey boldface italic. + o Comments are rendered in grey italic. + + To render keywords without underlining, refer to the `Algol_Nu` style. + + For lowercase conversion of keywords and builtins in languages where + these are not or might not be lowercase, a supporting lexer is required. + The Algol and Modula-2 lexers automatically convert to lowercase whenever + this style is selected. + + [1] `Revised Report on the Algorithmic Language Algol-60 <http://www.masswerk.at/algol60/report.htm>` + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, Operator + + +class AlgolStyle(Style): + + background_color = "#ffffff" + default_style = "" + + styles = { + Comment: "italic #888", + Comment.Preproc: "bold noitalic #888", + Comment.Special: "bold noitalic #888", + + Keyword: "underline bold", + Keyword.Declaration: "italic", + + Name.Builtin: "bold italic", + Name.Builtin.Pseudo: "bold italic", + Name.Namespace: "bold italic #666", + Name.Class: "bold italic #666", + Name.Function: "bold italic #666", + Name.Variable: "bold italic #666", + Name.Constant: "bold italic #666", + + Operator.Word: "bold", + + String: "italic #666", + + Error: "border:#FF0000" + } diff --git a/pygments/styles/algol_nu.py b/pygments/styles/algol_nu.py new file mode 100644 index 00000000..392838f2 --- /dev/null +++ b/pygments/styles/algol_nu.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.algol_nu + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Algol publication style without underlining of keywords. + + This style renders source code for publication of algorithms in + scientific papers and academic texts, where its format is frequently used. + + It is based on the style of the revised Algol-60 language report[1]. + + o No colours, only black, white and shades of grey are used. + o Keywords are rendered in lowercase boldface. + o Builtins are rendered in lowercase boldface italic. + o Docstrings and pragmas are rendered in dark grey boldface. + o Library identifiers are rendered in dark grey boldface italic. + o Comments are rendered in grey italic. + + To render keywords with underlining, refer to the `Algol` style. + + For lowercase conversion of keywords and builtins in languages where + these are not or might not be lowercase, a supporting lexer is required. + The Algol and Modula-2 lexers automatically convert to lowercase whenever + this style is selected. + + [1] `Revised Report on the Algorithmic Language Algol-60 <http://www.masswerk.at/algol60/report.htm>` + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, Operator + + +class Algol_NuStyle(Style): + + background_color = "#ffffff" + default_style = "" + + styles = { + Comment: "italic #888", + Comment.Preproc: "bold noitalic #888", + Comment.Special: "bold noitalic #888", + + Keyword: "bold", + Keyword.Declaration: "italic", + + Name.Builtin: "bold italic", + Name.Builtin.Pseudo: "bold italic", + Name.Namespace: "bold italic #666", + Name.Class: "bold italic #666", + Name.Function: "bold italic #666", + Name.Variable: "bold italic #666", + Name.Constant: "bold italic #666", + + Operator.Word: "bold", + + String: "italic #666", + + Error: "border:#FF0000" + } diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py new file mode 100644 index 00000000..31bd5505 --- /dev/null +++ b/pygments/styles/lovelace.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.lovelace + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Lovelace by Miikka Salminen + + Pygments style by Miikka Salminen (https://github.com/miikkas) + A desaturated, somewhat subdued style created for the Lovelace interactive + learning environment. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, \ + Number, Operator, Punctuation, Generic, Whitespace + + +class LovelaceStyle(Style): + """ + The style used in Lovelace interactive learning environment. Tries to avoid + the "angry fruit salad" effect with desaturated and dim colours. + """ + _KW_BLUE = '#2838b0' + _NAME_GREEN = '#388038' + _DOC_ORANGE = '#b85820' + _OW_PURPLE = '#a848a8' + _FUN_BROWN = '#785840' + _STR_RED = '#b83838' + _CLS_CYAN = '#287088' + _ESCAPE_LIME = '#709030' + _LABEL_CYAN = '#289870' + _EXCEPT_YELLOW = '#908828' + + default_style = '#222222' + + styles = { + Whitespace: '#a89028', + Comment: 'italic #888888', + Comment.Hashbang: _CLS_CYAN, + Comment.Multiline: '#888888', + Comment.Preproc: 'noitalic '+_LABEL_CYAN, + + Keyword: _KW_BLUE, + Keyword.Constant: 'italic #444444', + Keyword.Declaration: 'italic', + Keyword.Type: 'italic', + + Operator: '#666666', + Operator.Word: _OW_PURPLE, + + Punctuation: '#888888', + + Name.Attribute: _NAME_GREEN, + Name.Builtin: _NAME_GREEN, + Name.Builtin.Pseudo: 'italic', + Name.Class: _CLS_CYAN, + Name.Constant: _DOC_ORANGE, + Name.Decorator: _CLS_CYAN, + Name.Entity: _ESCAPE_LIME, + Name.Exception: _EXCEPT_YELLOW, + Name.Function: _FUN_BROWN, + Name.Label: _LABEL_CYAN, + Name.Namespace: _LABEL_CYAN, + Name.Tag: _KW_BLUE, + Name.Variable: '#b04040', + Name.Variable.Global:_EXCEPT_YELLOW, + + String: _STR_RED, + String.Char: _OW_PURPLE, + String.Doc: 'italic '+_DOC_ORANGE, + String.Escape: _ESCAPE_LIME, + String.Interpol: 'underline', + String.Other: _OW_PURPLE, + String.Regex: _OW_PURPLE, + + Number: '#444444', + + Generic.Deleted: '#c02828', + Generic.Emph: 'italic', + Generic.Error: '#c02828', + Generic.Heading: '#666666', + Generic.Subheading: '#444444', + Generic.Inserted: _NAME_GREEN, + Generic.Output: '#666666', + Generic.Prompt: '#444444', + Generic.Strong: 'bold', + Generic.Traceback: _KW_BLUE, + + Error: 'bg:'+_OW_PURPLE, + } diff --git a/pygments/token.py b/pygments/token.py index e5eadf0d..bfdfc114 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -179,6 +179,7 @@ STANDARD_TYPES = { Punctuation: 'p', Comment: 'c', + Comment.Hashbang: 'ch', Comment.Multiline: 'cm', Comment.Preproc: 'cp', Comment.Single: 'c1', |
