From ca80bbb894f18e7ccbda553d33b50ed222e7727f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Tue, 16 Sep 2014 18:58:00 +0200 Subject: split up lexers.compiled into multiple submodules --- pygments/lexers/python.py | 196 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 pygments/lexers/python.py (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py new file mode 100644 index 00000000..aea29355 --- /dev/null +++ b/pygments/lexers/python.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.python + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Python and related languages. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, bygroups, using, \ + this, inherit, default, words, combined +from pygments.util import get_bool_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Error + +__all__ = ['CythonLexer'] + + +class CythonLexer(RegexLexer): + """ + For Pyrex and `Cython `_ source code. + + .. versionadded:: 1.1 + """ + + name = 'Cython' + aliases = ['cython', 'pyx', 'pyrex'] + filenames = ['*.pyx', '*.pxd', '*.pxi'] + mimetypes = ['text/x-cython', 'application/x-cython'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), + (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'[^\S\n]+', Text), + (r'#.*$', Comment), + (r'[]{}:(),;[]', Punctuation), + (r'\\\n', Text), + (r'\\', Text), + (r'(in|is|and|or|not)\b', Operator.Word), + (r'(<)([a-zA-Z0-9.?]+)(>)', + bygroups(Punctuation, Keyword.Type, Punctuation)), + (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), + (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', + bygroups(Keyword, Number.Integer, Operator, Name, Operator, + Name, Punctuation)), + include('keywords'), + (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), + (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), + (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), + (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), + (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), + include('builtins'), + include('backtick'), + ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), + ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), + ('[uU]?"""', String, combined('stringescape', 'tdqs')), + ("[uU]?'''", String, combined('stringescape', 'tsqs')), + ('[uU]?"', String, combined('stringescape', 'dqs')), + ("[uU]?'", String, combined('stringescape', 'sqs')), + include('name'), + include('numbers'), + ], + 'keywords': [ + (words(( + 'assert', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif', + 'else', 'except', 'except?', 'exec', 'finally', 'for', 'gil', + 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print', + 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'), + Keyword), + (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), + ], + 'builtins': [ + (words(( + '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', + 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', + 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', + 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', + 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', + 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', + 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', + 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', + 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', + 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', + 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', + 'vars', 'xrange', 'zip'), prefix=r'(? Date: Tue, 16 Sep 2014 20:21:53 +0200 Subject: split up agile.py into individual modules --- pygments/lexers/python.py | 638 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 633 insertions(+), 5 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index aea29355..2a29aadb 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -11,13 +11,436 @@ import re -from pygments.lexer import RegexLexer, include, bygroups, using, \ - this, inherit, default, words, combined -from pygments.util import get_bool_opt +from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ + default, words, combined, do_insertions +from pygments.util import get_bool_opt, shebang_matches from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Error + Number, Punctuation, Generic, Other, Error +from pygments import unistring as uni -__all__ = ['CythonLexer'] +__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', + 'Python3Lexer', 'Python3TracebackLexer', 'CythonLexer', + 'HyLexer', 'DgLexer'] + +line_re = re.compile('.*?\n') + + +class PythonLexer(RegexLexer): + """ + For `Python `_ source code. + """ + + name = 'Python' + aliases = ['python', 'py', 'sage'] + filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage'] + mimetypes = ['text/x-python', 'application/x-python'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), + (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'[^\S\n]+', Text), + (r'#.*$', Comment), + (r'[]{}:(),;[]', Punctuation), + (r'\\\n', Text), + (r'\\', Text), + (r'(in|is|and|or|not)\b', Operator.Word), + (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), + include('keywords'), + (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), + (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), + (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), + 'fromimport'), + (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), + 'import'), + include('builtins'), + include('backtick'), + ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), + ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), + ('[uU]?"""', String, combined('stringescape', 'tdqs')), + ("[uU]?'''", String, combined('stringescape', 'tsqs')), + ('[uU]?"', String, combined('stringescape', 'dqs')), + ("[uU]?'", String, combined('stringescape', 'sqs')), + include('name'), + include('numbers'), + ], + 'keywords': [ + (r'(assert|break|continue|del|elif|else|except|exec|' + r'finally|for|global|if|lambda|pass|print|raise|' + r'return|try|while|yield(\s+from)?|as|with)\b', Keyword), + ], + 'builtins': [ + (r'(?`_ source code (version 3.0). + + .. versionadded:: 0.10 + """ + + name = 'Python 3' + aliases = ['python3', 'py3'] + filenames = [] # Nothing until Python 3 gets widespread + mimetypes = ['text/x-python3', 'application/x-python3'] + + flags = re.MULTILINE | re.UNICODE + + uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) + + tokens = PythonLexer.tokens.copy() + tokens['keywords'] = [ + (r'(assert|break|continue|del|elif|else|except|' + r'finally|for|global|if|lambda|pass|raise|nonlocal|' + r'return|try|while|yield(\s+from)?|as|with|True|False|None)\b', + Keyword), + ] + tokens['builtins'] = [ + (r'(?>> a = 'foo' + >>> print a + foo + >>> 1 / 0 + Traceback (most recent call last): + File "", line 1, in + ZeroDivisionError: integer division or modulo by zero + + Additional options: + + `python3` + Use Python 3 lexer for code. Default is ``False``. + + .. versionadded:: 1.0 + """ + name = 'Python console session' + aliases = ['pycon'] + mimetypes = ['text/x-python-doctest'] + + def __init__(self, **options): + self.python3 = get_bool_opt(options, 'python3', False) + Lexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + if self.python3: + pylexer = Python3Lexer(**self.options) + tblexer = Python3TracebackLexer(**self.options) + else: + pylexer = PythonLexer(**self.options) + tblexer = PythonTracebackLexer(**self.options) + + curcode = '' + insertions = [] + curtb = '' + tbindex = 0 + tb = 0 + for match in line_re.finditer(text): + line = match.group() + if line.startswith(u'>>> ') or line.startswith(u'... '): + tb = 0 + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:4])])) + curcode += line[4:] + elif line.rstrip() == u'...' and not tb: + # only a new >>> prompt can end an exception block + # otherwise an ellipsis in place of the traceback frames + # will be mishandled + insertions.append((len(curcode), + [(0, Generic.Prompt, u'...')])) + curcode += line[3:] + else: + if curcode: + for item in do_insertions( + insertions, pylexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + if (line.startswith(u'Traceback (most recent call last):') or + re.match(u' File "[^"]+", line \\d+\\n$', line)): + tb = 1 + curtb = line + tbindex = match.start() + elif line == 'KeyboardInterrupt\n': + yield match.start(), Name.Class, line + elif tb: + curtb += line + if not (line.startswith(' ') or line.strip() == u'...'): + tb = 0 + for i, t, v in tblexer.get_tokens_unprocessed(curtb): + yield tbindex+i, t, v + else: + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + pylexer.get_tokens_unprocessed(curcode)): + yield item + + +class PythonTracebackLexer(RegexLexer): + """ + For Python tracebacks. + + .. versionadded:: 0.7 + """ + + name = 'Python Traceback' + aliases = ['pytb'] + filenames = ['*.pytb'] + mimetypes = ['text/x-python-traceback'] + + tokens = { + 'root': [ + (r'^Traceback \(most recent call last\):\n', + Generic.Traceback, 'intb'), + # SyntaxError starts with this. + (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), + (r'^.*\n', Other), + ], + 'intb': [ + (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), + (r'^( File )("[^"]+")(, line )(\d+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text)), + (r'^( )(.+)(\n)', + bygroups(Text, using(PythonLexer), Text)), + (r'^([ \t]*)(\.\.\.)(\n)', + bygroups(Text, Comment, Text)), # for doctests... + (r'^([^:]+)(: )(.+)(\n)', + bygroups(Generic.Error, Text, Name, Text), '#pop'), + (r'^([a-zA-Z_]\w*)(:?\n)', + bygroups(Generic.Error, Text), '#pop') + ], + } + + +class Python3TracebackLexer(RegexLexer): + """ + For Python 3.0 tracebacks, with support for chained exceptions. + + .. versionadded:: 1.0 + """ + + name = 'Python 3.0 Traceback' + aliases = ['py3tb'] + filenames = ['*.py3tb'] + mimetypes = ['text/x-python3-traceback'] + + tokens = { + 'root': [ + (r'\n', Text), + (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), + (r'^During handling of the above exception, another ' + r'exception occurred:\n\n', Generic.Traceback), + (r'^The above exception was the direct cause of the ' + r'following exception:\n\n', Generic.Traceback), + (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), + ], + 'intb': [ + (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), + (r'^( File )("[^"]+")(, line )(\d+)(\n)', + bygroups(Text, Name.Builtin, Text, Number, Text)), + (r'^( )(.+)(\n)', + bygroups(Text, using(Python3Lexer), Text)), + (r'^([ \t]*)(\.\.\.)(\n)', + bygroups(Text, Comment, Text)), # for doctests... + (r'^([^:]+)(: )(.+)(\n)', + bygroups(Generic.Error, Text, Name, Text), '#pop'), + (r'^([a-zA-Z_]\w*)(:?\n)', + bygroups(Generic.Error, Text), '#pop') + ], + } class CythonLexer(RegexLexer): @@ -194,3 +617,208 @@ class CythonLexer(RegexLexer): include('nl') ], } + + +class HyLexer(RegexLexer): + """ + Lexer for `Hy `_ source code. + + .. versionadded:: 2.0 + """ + name = 'Hy' + aliases = ['hylang'] + filenames = ['*.hy'] + mimetypes = ['text/x-hy', 'application/x-hy'] + + special_forms = [ + 'cond', 'for', '->', '->>', 'car', + 'cdr', 'first', 'rest', 'let', 'when', 'unless', + 'import', 'do', 'progn', 'get', 'slice', 'assoc', 'with-decorator', + ',', 'list_comp', 'kwapply', '~', 'is', 'in', 'is-not', 'not-in', + 'quasiquote', 'unquote', 'unquote-splice', 'quote', '|', '<<=', '>>=', + 'foreach', 'while', + 'eval-and-compile', 'eval-when-compile' + ] + + declarations = [ + 'def', 'defn', 'defun', 'defmacro', 'defclass', 'lambda', 'fn', 'setv' + ] + + hy_builtins = [] + + hy_core = [ + 'cycle', 'dec', 'distinct', 'drop', 'even?', 'filter', 'inc', + 'instance?', 'iterable?', 'iterate', 'iterator?', 'neg?', + 'none?', 'nth', 'numeric?', 'odd?', 'pos?', 'remove', 'repeat', + 'repeatedly', 'take', 'take_nth', 'take_while', 'zero?' + ] + + builtins = hy_builtins + hy_core + + # valid names for identifiers + # well, names can only not consist fully of numbers + # but this should be good enough for now + valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+' + + def _multi_escape(entries): + return '(%s)' % ('|'.join(re.escape(entry) + ' ' for entry in entries)) + + tokens = { + 'root': [ + # the comments - always starting with semicolon + # and going to the end of the line + (r';.*$', Comment.Single), + + # whitespaces - usually not relevant + (r'[,\s]+', Text), + + # numbers + (r'-?\d+\.\d+', Number.Float), + (r'-?\d+', Number.Integer), + (r'0[0-7]+j?', Number.Oct), + (r'0[xX][a-fA-F0-9]+', Number.Hex), + + # strings, symbols and characters + (r'"(\\\\|\\"|[^"])*"', String), + (r"'" + valid_name, String.Symbol), + (r"\\(.|[a-z]+)", String.Char), + (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), + (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + + # keywords + (r'::?' + valid_name, String.Symbol), + + # special operators + (r'~@|[`\'#^~&@]', Operator), + + include('py-keywords'), + include('py-builtins'), + + # highlight the special forms + (_multi_escape(special_forms), Keyword), + + # Technically, only the special forms are 'keywords'. The problem + # is that only treating them as keywords means that things like + # 'defn' and 'ns' need to be highlighted as builtins. This is ugly + # and weird for most styles. So, as a compromise we're going to + # highlight them as Keyword.Declarations. + (_multi_escape(declarations), Keyword.Declaration), + + # highlight the builtins + (_multi_escape(builtins), Name.Builtin), + + # the remaining functions + (r'(?<=\()' + valid_name, Name.Function), + + # find the remaining variables + (valid_name, Name.Variable), + + # Hy accepts vector notation + (r'(\[|\])', Punctuation), + + # Hy accepts map notation + (r'(\{|\})', Punctuation), + + # the famous parentheses! + (r'(\(|\))', Punctuation), + + ], + 'py-keywords': PythonLexer.tokens['keywords'], + 'py-builtins': PythonLexer.tokens['builtins'], + } + + def analyse_text(text): + if '(import ' in text or '(defn ' in text: + return 0.9 + + +class DgLexer(RegexLexer): + """ + Lexer for `dg `_, + a functional and object-oriented programming language + running on the CPython 3 VM. + + .. versionadded:: 1.6 + """ + name = 'dg' + aliases = ['dg'] + filenames = ['*.dg'] + mimetypes = ['text/x-dg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r'#.*?$', Comment.Single), + + (r'(?i)0b[01]+', Number.Bin), + (r'(?i)0o[0-7]+', Number.Oct), + (r'(?i)0x[0-9a-f]+', Number.Hex), + (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float), + (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float), + (r'(?i)[+-]?[0-9]+j?', Number.Integer), + + (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')), + (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')), + (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')), + (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')), + + (r"`\w+'*`", Operator), + (r'\b(and|in|is|or|where)\b', Operator.Word), + (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), + + (r"(? Date: Tue, 16 Sep 2014 20:36:59 +0200 Subject: move Hy to lisp, use words() in python.py --- pygments/lexers/python.py | 273 ++++++++++++++++------------------------------ 1 file changed, 92 insertions(+), 181 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 2a29aadb..db747d2e 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -20,7 +20,7 @@ from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'CythonLexer', - 'HyLexer', 'DgLexer'] + 'DgLexer'] line_re = re.compile('.*?\n') @@ -68,35 +68,46 @@ class PythonLexer(RegexLexer): include('numbers'), ], 'keywords': [ - (r'(assert|break|continue|del|elif|else|except|exec|' - r'finally|for|global|if|lambda|pass|print|raise|' - r'return|try|while|yield(\s+from)?|as|with)\b', Keyword), + (words(( + 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', + 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass', + 'print', 'raise', 'return', 'try', 'while', 'yield', + 'yield from', 'as', 'with'), suffix=r'\b'), + Keyword), ], 'builtins': [ - (r'(?`_ source code. - - .. versionadded:: 2.0 - """ - name = 'Hy' - aliases = ['hylang'] - filenames = ['*.hy'] - mimetypes = ['text/x-hy', 'application/x-hy'] - - special_forms = [ - 'cond', 'for', '->', '->>', 'car', - 'cdr', 'first', 'rest', 'let', 'when', 'unless', - 'import', 'do', 'progn', 'get', 'slice', 'assoc', 'with-decorator', - ',', 'list_comp', 'kwapply', '~', 'is', 'in', 'is-not', 'not-in', - 'quasiquote', 'unquote', 'unquote-splice', 'quote', '|', '<<=', '>>=', - 'foreach', 'while', - 'eval-and-compile', 'eval-when-compile' - ] - - declarations = [ - 'def', 'defn', 'defun', 'defmacro', 'defclass', 'lambda', 'fn', 'setv' - ] - - hy_builtins = [] - - hy_core = [ - 'cycle', 'dec', 'distinct', 'drop', 'even?', 'filter', 'inc', - 'instance?', 'iterable?', 'iterate', 'iterator?', 'neg?', - 'none?', 'nth', 'numeric?', 'odd?', 'pos?', 'remove', 'repeat', - 'repeatedly', 'take', 'take_nth', 'take_while', 'zero?' - ] - - builtins = hy_builtins + hy_core - - # valid names for identifiers - # well, names can only not consist fully of numbers - # but this should be good enough for now - valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+' - - def _multi_escape(entries): - return '(%s)' % ('|'.join(re.escape(entry) + ' ' for entry in entries)) - - tokens = { - 'root': [ - # the comments - always starting with semicolon - # and going to the end of the line - (r';.*$', Comment.Single), - - # whitespaces - usually not relevant - (r'[,\s]+', Text), - - # numbers - (r'-?\d+\.\d+', Number.Float), - (r'-?\d+', Number.Integer), - (r'0[0-7]+j?', Number.Oct), - (r'0[xX][a-fA-F0-9]+', Number.Hex), - - # strings, symbols and characters - (r'"(\\\\|\\"|[^"])*"', String), - (r"'" + valid_name, String.Symbol), - (r"\\(.|[a-z]+)", String.Char), - (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), - (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), - - # keywords - (r'::?' + valid_name, String.Symbol), - - # special operators - (r'~@|[`\'#^~&@]', Operator), - - include('py-keywords'), - include('py-builtins'), - - # highlight the special forms - (_multi_escape(special_forms), Keyword), - - # Technically, only the special forms are 'keywords'. The problem - # is that only treating them as keywords means that things like - # 'defn' and 'ns' need to be highlighted as builtins. This is ugly - # and weird for most styles. So, as a compromise we're going to - # highlight them as Keyword.Declarations. - (_multi_escape(declarations), Keyword.Declaration), - - # highlight the builtins - (_multi_escape(builtins), Name.Builtin), - - # the remaining functions - (r'(?<=\()' + valid_name, Name.Function), - - # find the remaining variables - (valid_name, Name.Variable), - - # Hy accepts vector notation - (r'(\[|\])', Punctuation), - - # Hy accepts map notation - (r'(\{|\})', Punctuation), - - # the famous parentheses! - (r'(\(|\))', Punctuation), - - ], - 'py-keywords': PythonLexer.tokens['keywords'], - 'py-builtins': PythonLexer.tokens['builtins'], - } - - def analyse_text(text): - if '(import ' in text or '(defn ' in text: - return 0.9 - - class DgLexer(RegexLexer): """ Lexer for `dg `_, @@ -766,17 +672,22 @@ class DgLexer(RegexLexer): (r'\b(and|in|is|or|where)\b', Operator.Word), (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), - (r"(? Date: Sat, 20 Sep 2014 00:28:17 +0200 Subject: misc fixups in math.py and jvm.py --- pygments/lexers/python.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index db747d2e..6cd0a6c8 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -20,7 +20,7 @@ from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'CythonLexer', - 'DgLexer'] + 'DgLexer', 'NumPyLexer'] line_re = re.compile('.*?\n') @@ -733,3 +733,98 @@ class DgLexer(RegexLexer): (r"'''", String, '#pop') ], } + + +class NumPyLexer(PythonLexer): + """ + A Python lexer recognizing Numerical Python builtins. + + .. versionadded:: 0.10 + """ + + name = 'NumPy' + aliases = ['numpy'] + + # override the mimetypes to not inherit them from python + mimetypes = [] + filenames = [] + + EXTRA_KEYWORDS = set(( + 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose', + 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append', + 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh', + 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin', + 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal', + 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange', + 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray', + 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype', + 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett', + 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial', + 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman', + 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_', + 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type', + 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate', + 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov', + 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate', + 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide', + 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty', + 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye', + 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill', + 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud', + 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer', + 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring', + 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include', + 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize', + 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater', + 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram', + 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0', + 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info', + 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d', + 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj', + 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf', + 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_', + 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_', + 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort', + 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2', + 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace', + 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype', + 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min', + 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan', + 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum', + 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer', + 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones', + 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload', + 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv', + 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod', + 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers', + 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close', + 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require', + 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll', + 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_', + 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select', + 'set_numeric_ops', 'set_printoptions', 'set_string_function', + 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj', + 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape', + 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh', + 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source', + 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std', + 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot', + 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose', + 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict', + 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index', + 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises', + 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like' + )) + + def get_tokens_unprocessed(self, text): + for index, token, value in \ + PythonLexer.get_tokens_unprocessed(self, text): + if token is Name and value in self.EXTRA_KEYWORDS: + yield index, Keyword.Pseudo, value + else: + yield index, token, value + + def analyse_text(text): + return (shebang_matches(text, r'pythonw?(2(\.\d)?)?') or + 'import ' in text[:1000]) \ + and ('import numpy' in text or 'from numpy import' in text) -- cgit v1.2.1 From 9cbc7803dd8e7826393721fe4acbd702843e131c Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Tue, 7 Oct 2014 13:44:29 +0200 Subject: Closes #874: do not swallow incomplete tracebacks in the Python console lexer. --- pygments/lexers/python.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 6cd0a6c8..411f7bc7 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -384,6 +384,9 @@ class PythonConsoleLexer(Lexer): for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item + if curtb: + for i, t, v in tblexer.get_tokens_unprocessed(curtb): + yield tbindex+i, t, v class PythonTracebackLexer(RegexLexer): -- cgit v1.2.1 From 86c2eee37cf292677032e02f553710d4c1eb1f04 Mon Sep 17 00:00:00 2001 From: Tim Hatch Date: Wed, 15 Oct 2014 22:13:59 -0700 Subject: Curly brace escaping, round 1. These locations were all found by regexlint and done semi-manually, leaving braces within [] alone (they aren't special in the 'regex' module). --- pygments/lexers/python.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 411f7bc7..3caf2725 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -151,7 +151,7 @@ class PythonLexer(RegexLexer): default('#pop'), ], 'stringescape': [ - (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ @@ -602,7 +602,7 @@ class CythonLexer(RegexLexer): default('#pop'), ], 'stringescape': [ - (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ @@ -710,7 +710,7 @@ class DgLexer(RegexLexer): (r'.', Error), ], 'stringescape': [ - (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'string': [ -- cgit v1.2.1 From 272429a2c5fc178f256a347c9d7642c6667b9a67 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Thu, 6 Nov 2014 12:10:16 +0100 Subject: Simplify charclasses in a few more modules --- pygments/lexers/python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 3caf2725..259d1a9c 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -699,7 +699,7 @@ class DgLexer(RegexLexer): (r"(? Date: Tue, 20 Jan 2015 07:55:32 +0100 Subject: Fix Python tracebacks getting duplicated in the console lexer (closes #1068). --- pygments/lexers/python.py | 1 + 1 file changed, 1 insertion(+) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 259d1a9c..01ab1e7d 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -378,6 +378,7 @@ class PythonConsoleLexer(Lexer): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v + curtb = '' else: yield match.start(), Generic.Output, line if curcode: -- cgit v1.2.1 From 13705acbd57b936990c63a12de05ce29834b6afb Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 21 Jan 2015 08:33:25 +0100 Subject: Copyright year update. --- pygments/lexers/python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 01ab1e7d..3c1aff56 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -5,7 +5,7 @@ Lexers for Python and related languages. - :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -- cgit v1.2.1 From 7009e7a5ad29ff5d911533c83081eb76a887e9ed Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Sat, 7 Mar 2015 21:59:54 +0200 Subject: Moved True, False and None to Keyword.Constant. --- pygments/lexers/python.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 3c1aff56..006ffd02 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -216,8 +216,11 @@ class Python3Lexer(RegexLexer): 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', - 'with', 'True', 'False', 'None'), suffix=r'\b'), + 'with'), suffix=r'\b'), Keyword), + (words(( + 'True', 'False', 'None'), suffix=r'\b'), + Keyword.Constant), ] tokens['builtins'] = [ (words(( -- cgit v1.2.1 From ee9a9c0563710b000b276fa215e189335b7da69b Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Sat, 7 Mar 2015 22:05:28 +0200 Subject: Added support for the (still valid) old style string interpolation along with the new style string formatting to the Python 3 lexer. --- pygments/lexers/python.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 3c1aff56..912a2bb2 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -155,6 +155,7 @@ class PythonLexer(RegexLexer): r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ + # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^\\\'"%\n]+', String), @@ -288,8 +289,16 @@ class Python3Lexer(RegexLexer): (uni_name, Name.Namespace), default('#pop'), ] - # don't highlight "%s" substitutions tokens['strings'] = [ + # the old style '%s' % (...) string formatting (still valid in Py3) + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # the new style '{}'.format(...) string formatting + (r'\{' + '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + '(\![sra])?' # conversion + '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' + '\}', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), -- cgit v1.2.1 From b55a3a9f79dc14c146a11aa32230624469ff874b Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Sat, 7 Mar 2015 22:30:17 +0200 Subject: Added a new token type Comment.Hashbang with its documentation, and a support for it in Javascript, Perl, Python, Ruby and Bash lexers. --- pygments/lexers/python.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 3c1aff56..e0da29f6 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -41,7 +41,8 @@ class PythonLexer(RegexLexer): (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), - (r'#.*$', Comment), + (r'\A#!.+$', Comment.Hashbang), + (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), -- cgit v1.2.1 From e871b16968f56234daa21947598e89638279c9a1 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Sun, 8 Mar 2015 15:15:22 +0200 Subject: The new style string formatting now works correctly with multiple occurrences of interpolation fields in one string, and erroneous fields are handled properly. --- pygments/lexers/python.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index ed63fb25..e5218be0 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -159,8 +159,8 @@ class PythonLexer(RegexLexer): # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', String), - # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), # unhandled string formatting sign (r'%', String) @@ -303,11 +303,11 @@ class Python3Lexer(RegexLexer): '(\![sra])?' # conversion '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' '\}', String.Interpol), - (r'[^\\\'"%\n]+', String), - # quotes, percents and backslashes must be parsed one at a time + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"%\{\n]+', String), (r'[\'"\\]', String), # unhandled string formatting sign - (r'%', String) + (r'%|(\{{1,2})', String) # newlines are an error (use "nl" state) ] -- cgit v1.2.1 From 0a91d7635a427cfb5b7a219af2feaabea85fcd7f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 8 Mar 2015 15:12:45 +0100 Subject: Closes #1088: add ResourceWarning. --- pygments/lexers/python.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index e5218be0..49c15b6d 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -246,7 +246,7 @@ class Python3Lexer(RegexLexer): 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'NotImplementedError', 'OSError', 'OverflowError', - 'PendingDeprecationWarning', 'ReferenceError', + 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning', 'RuntimeError', 'RuntimeWarning', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', -- cgit v1.2.1 From dbcad72b2460ebde903dcd90552d1867e1e1fed3 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sat, 8 Aug 2015 06:55:23 +0200 Subject: Python: add async, await, matmul op --- pygments/lexers/python.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 49c15b6d..ea97b855 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -215,10 +215,10 @@ class Python3Lexer(RegexLexer): tokens = PythonLexer.tokens.copy() tokens['keywords'] = [ (words(( - 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', - 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', - 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', - 'with'), suffix=r'\b'), + 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', + 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', + 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', + 'as', 'with'), suffix=r'\b'), Keyword), (words(( 'True', 'False', 'None'), suffix=r'\b'), @@ -272,6 +272,7 @@ class Python3Lexer(RegexLexer): tokens['backtick'] = [] tokens['name'] = [ (r'@\w+', Name.Decorator), + (r'@', Operator), # new matrix multiplication operator (uni_name, Name), ] tokens['funcname'] = [ -- cgit v1.2.1 From 131047f8e3e72160e25868cda4e25cc08bc3b88b Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sat, 17 Oct 2015 08:39:33 +0200 Subject: Closes #685: use String.{Single,Double} in Python lexer. --- pygments/lexers/python.py | 65 ++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 32 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index ea97b855..903078d8 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -35,6 +35,19 @@ class PythonLexer(RegexLexer): filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage'] mimetypes = ['text/x-python', 'application/x-python'] + def innerstring_rules(ttype): + return [ + # the old style '%s' % (...) string formatting + (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + # backslashes, quotes and formatting signs must be parsed one at a time + (r'[^\\\'"%\n]+', ttype), + (r'[\'"\\]', ttype), + # unhandled string formatting sign + (r'%', ttype), + # newlines are an error (use "nl" state) + ] + tokens = { 'root': [ (r'\n', Text), @@ -57,14 +70,14 @@ class PythonLexer(RegexLexer): 'import'), include('builtins'), include('backtick'), - ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), - ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), - ('[uU]?"""', String, combined('stringescape', 'tdqs')), - ("[uU]?'''", String, combined('stringescape', 'tsqs')), - ('[uU]?"', String, combined('stringescape', 'dqs')), - ("[uU]?'", String, combined('stringescape', 'sqs')), + ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'), + ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'), + ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'), + ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')), + ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')), + ('[uU]?"', String.Double, combined('stringescape', 'dqs')), + ("[uU]?'", String.Single, combined('stringescape', 'sqs')), include('name'), include('numbers'), ], @@ -155,39 +168,27 @@ class PythonLexer(RegexLexer): (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], - 'strings': [ - # the old style '%s' % (...) string formatting - (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), - # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"%\n]+', String), - (r'[\'"\\]', String), - # unhandled string formatting sign - (r'%', String) - # newlines are an error (use "nl" state) - ], - 'nl': [ - (r'\n', String) - ], + 'strings-single': innerstring_rules(String.Single), + 'strings-double': innerstring_rules(String.Double), 'dqs': [ - (r'"', String, '#pop'), + (r'"', String.Double, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings - include('strings') + include('strings-double') ], 'sqs': [ - (r"'", String, '#pop'), + (r"'", String.Single, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings - include('strings') + include('strings-single') ], 'tdqs': [ - (r'"""', String, '#pop'), - include('strings'), - include('nl') + (r'"""', String.Double, '#pop'), + include('strings-double'), + (r'\n', String.Double) ], 'tsqs': [ - (r"'''", String, '#pop'), - include('strings'), - include('nl') + (r"'''", String.Single, '#pop'), + include('strings-single'), + (r'\n', String.Single) ], } -- cgit v1.2.1 From e23a91d9384494f6543313df8a702ef1c4bec3f1 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sat, 17 Oct 2015 08:44:13 +0200 Subject: Closes #1146: add "unsigned" as a cython builtin. --- pygments/lexers/python.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'pygments/lexers/python.py') diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 903078d8..ccbd4c15 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -550,7 +550,7 @@ class CythonLexer(RegexLexer): 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', - 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', + 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned', 'vars', 'xrange', 'zip'), prefix=r'(?