summaryrefslogtreecommitdiff
path: root/sphinx/pycode
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2008-12-29 20:22:18 +0100
committerGeorg Brandl <georg@python.org>2008-12-29 20:22:18 +0100
commit49c15e93a21014f6640bfd0e0c6752bdc61d105c (patch)
tree8cb3e80a39028c2a78fc6155d100d36cacbec333 /sphinx/pycode
parent2dc410c1bb8e078c5e98cb5bdad332c28abb998f (diff)
downloadsphinx-git-49c15e93a21014f6640bfd0e0c6752bdc61d105c.tar.gz
Add pgen2 and custom utilities.
Diffstat (limited to 'sphinx/pycode')
-rw-r--r--sphinx/pycode/Grammar.txt155
-rw-r--r--sphinx/pycode/__init__.py142
-rw-r--r--sphinx/pycode/pgen2/__init__.py4
-rw-r--r--sphinx/pycode/pgen2/driver.py145
-rw-r--r--sphinx/pycode/pgen2/grammar.py171
-rw-r--r--sphinx/pycode/pgen2/literals.py60
-rw-r--r--sphinx/pycode/pgen2/parse.py201
-rw-r--r--sphinx/pycode/pgen2/pgen.py384
-rwxr-xr-xsphinx/pycode/pgen2/token.py82
-rw-r--r--sphinx/pycode/pgen2/tokenize.py405
-rw-r--r--sphinx/pycode/pytree.py295
11 files changed, 2044 insertions, 0 deletions
diff --git a/sphinx/pycode/Grammar.txt b/sphinx/pycode/Grammar.txt
new file mode 100644
index 000000000..1f4a50ffb
--- /dev/null
+++ b/sphinx/pycode/Grammar.txt
@@ -0,0 +1,155 @@
+# Grammar for Python
+
+# Note: Changing the grammar specified in this file will most likely
+# require corresponding changes in the parser module
+# (../Modules/parsermodule.c). If you can't make the changes to
+# that module yourself, please co-ordinate the required changes
+# with someone who can; ask around on python-dev for help. Fred
+# Drake <fdrake@acm.org> will probably be listening there.
+
+# NOTE WELL: You should also follow all the steps listed in PEP 306,
+# "How to Change Python's Grammar"
+
+# Commands for Kees Blom's railroad program
+#diagram:token NAME
+#diagram:token NUMBER
+#diagram:token STRING
+#diagram:token NEWLINE
+#diagram:token ENDMARKER
+#diagram:token INDENT
+#diagram:output\input python.bla
+#diagram:token DEDENT
+#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm
+#diagram:rules
+
+# Start symbols for the grammar:
+# file_input is a module or sequence of commands read from an input file;
+# single_input is a single interactive statement;
+# eval_input is the input for the eval() and input() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+file_input: (NEWLINE | stmt)* ENDMARKER
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef)
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: ((tfpdef ['=' test] ',')*
+ ('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
+ | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
+tname: NAME [':' test]
+tfpdef: tname | '(' tfplist ')'
+tfplist: tfpdef (',' tfpdef)* [',']
+varargslist: ((vfpdef ['=' test] ',')*
+ ('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname)
+ | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
+vname: NAME
+vfpdef: vname | '(' vfplist ')'
+vfplist: vfpdef (',' vfpdef)* [',']
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
+ import_stmt | global_stmt | exec_stmt | assert_stmt)
+expr_stmt: testlist (augassign (yield_expr|testlist) |
+ ('=' (yield_expr|testlist))*)
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+ '<<=' | '>>=' | '**=' | '//=')
+# For normal assignments, additional restrictions enforced by the interpreter
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+ '>>' test [ (',' test)+ [','] ] )
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+import_from: ('from' ('.'* dotted_name | '.'+)
+ 'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+ ((except_clause ':' suite)+
+ ['else' ':' suite]
+ ['finally' ':' suite] |
+ 'finally' ':' suite))
+with_stmt: 'with' test [ with_var ] ':' suite
+with_var: 'as' expr
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [(',' | 'as') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+# Backward compatibility cruft to support:
+# [ x for x in lambda: True, lambda: False if x() ]
+# even while also allowing:
+# lambda x: 5 if x else 2
+# (But not a mix of the two)
+testlist_safe: old_test [(',' old_test)+ [',']]
+old_test: or_test | old_lambdef
+old_lambdef: 'lambda' [varargslist] ':' old_test
+
+test: or_test ['if' or_test 'else' test] | lambdef
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom trailer* ['**' factor]
+atom: ('(' [yield_expr|testlist_gexp] ')' |
+ '[' [listmaker] ']' |
+ '{' [dictsetmaker] '}' |
+ '`' testlist1 '`' |
+ NAME | NUMBER | STRING+ | '.' '.' '.')
+listmaker: test ( comp_for | (',' test)* [','] )
+testlist_gexp: test ( comp_for | (',' test)* [','] )
+lambdef: 'lambda' [varargslist] ':' test
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: expr (',' expr)* [',']
+testlist: test (',' test)* [',']
+dictsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
+ (test (comp_for | (',' test)* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: (argument ',')* (argument [',']
+ |'*' test (',' argument)* [',' '**' test]
+ |'**' test)
+argument: test [comp_for] | test '=' test # Really [keyword '='] test
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
+comp_if: 'if' old_test [comp_iter]
+
+testlist1: test (',' test)*
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [testlist]
diff --git a/sphinx/pycode/__init__.py b/sphinx/pycode/__init__.py
new file mode 100644
index 000000000..b5d83e679
--- /dev/null
+++ b/sphinx/pycode/__init__.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""
+ sphinx.pycode
+ ~~~~~~~~~~~~~
+
+ Utilities parsing and analyzing Python code.
+
+ :copyright: 2008 by Georg Brandl.
+ :license: BSD, see LICENSE for details.
+"""
+
+import sys
+from os import path
+
+from sphinx.pycode import pytree
+from sphinx.pycode.pgen2 import driver, token
+
+
+# load the Python grammar
+_grammarfile = path.join(path.dirname(__file__), 'Grammar.txt')
+pygrammar = driver.load_grammar(_grammarfile)
+pydriver = driver.Driver(pygrammar, convert=pytree.convert)
+
+class sym: pass
+for k, v in pygrammar.symbol2number.iteritems():
+ setattr(sym, k, v)
+
+# a dict mapping terminal and nonterminal numbers to their names
+number2name = pygrammar.number2symbol.copy()
+number2name.update(token.tok_name)
+
+
+def prepare_commentdoc(s):
+ result = []
+ lines = [line.strip() for line in s.expandtabs().splitlines()]
+ for line in lines:
+ if line.startswith('#: '):
+ result.append(line[3:])
+ if result and result[-1]:
+ result.append('')
+ return '\n'.join(result)
+
+
+_eq = pytree.Leaf(token.EQUAL, '=')
+
+
+class ClassAttrVisitor(pytree.NodeVisitor):
+ def init(self):
+ self.namespace = []
+
+ def visit_classdef(self, node):
+ self.namespace.append(node[1].value)
+ self.generic_visit(node)
+ self.namespace.pop()
+
+ def visit_expr_stmt(self, node):
+ if _eq in node.children:
+ prefix = node[0].get_prefix()
+ if not prefix:
+ prev = node[0].get_prev_leaf()
+ if prev and prev.type == token.INDENT:
+ prefix = prev.prefix
+ doc = prepare_commentdoc(prefix)
+ if doc:
+ targ = '.'.join(self.namespace + [node[0].compact()])
+ print targ
+ print doc
+
+ def visit_funcdef(self, node):
+ return
+
+
+class ModuleAnalyzer(object):
+
+ def __init__(self, tree, modname, srcname):
+ self.tree = tree
+ self.modname = modname
+ self.srcname = srcname
+
+ @classmethod
+ def for_string(cls, string, modname, srcname='<string>'):
+ return cls(pydriver.parse_string(string), modname, srcname)
+
+ @classmethod
+ def for_file(cls, filename, modname):
+ # XXX if raises
+ fileobj = open(filename, 'r')
+ try:
+ return cls(pydriver.parse_stream(fileobj), modname, filename)
+ finally:
+ fileobj.close()
+
+ @classmethod
+ def for_module(cls, modname):
+ if modname not in sys.modules:
+ # XXX
+ __import__(modname)
+ mod = sys.modules[modname]
+ if hasattr(mod, '__loader__'):
+ # XXX raises
+ source = mod.__loader__.get_source(modname)
+ return cls.for_string(source, modname)
+ filename = getattr(mod, '__file__', None)
+ if filename is None:
+ # XXX
+ raise RuntimeError('no source found')
+ if filename.lower().endswith('.pyo') or \
+ filename.lower().endswith('.pyc'):
+ filename = filename[:-1]
+ elif not filename.lower().endswith('.py'):
+ raise RuntimeError('not a .py file')
+ if not path.isfile(filename):
+ # XXX
+ raise RuntimeError('source not present')
+ return cls.for_file(filename, modname)
+
+ def find_defs(self):
+ attr_visitor = ClassAttrVisitor(number2name)
+ attr_visitor.namespace = [self.modname]
+ attr_visitor.visit(self.tree)
+
+class Test:
+ """doc"""
+
+ #: testing...
+ x = 1
+ """doc"""
+
+ #: testing more...
+ x = 2
+
+
+#ma = ModuleAnalyzer.for_file(__file__.rstrip('c'))
+import time
+x0=time.time()
+ma = ModuleAnalyzer.for_module('sphinx.builders.latex')
+x1=time.time()
+ma.find_defs()
+x2=time.time()
+print "%.4f %.4f" % (x1-x0, x2-x1)
+
+#print pytree.nice_repr(ma.tree, number2name, True)
diff --git a/sphinx/pycode/pgen2/__init__.py b/sphinx/pycode/pgen2/__init__.py
new file mode 100644
index 000000000..af3904845
--- /dev/null
+++ b/sphinx/pycode/pgen2/__init__.py
@@ -0,0 +1,4 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""The pgen2 package."""
diff --git a/sphinx/pycode/pgen2/driver.py b/sphinx/pycode/pgen2/driver.py
new file mode 100644
index 000000000..3e9e10435
--- /dev/null
+++ b/sphinx/pycode/pgen2/driver.py
@@ -0,0 +1,145 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Modifications:
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser driver.
+
+This provides a high-level interface to parse a file into a syntax tree.
+
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+__all__ = ["Driver", "load_grammar"]
+
+# Python imports
+import os
+import logging
+import sys
+
+# Pgen imports
+from sphinx.pycode.pgen2 import grammar, parse, token, tokenize, pgen
+
+
+class Driver(object):
+
+ def __init__(self, grammar, convert=None, logger=None):
+ self.grammar = grammar
+ if logger is None:
+ logger = logging.getLogger()
+ self.logger = logger
+ self.convert = convert
+
+ def parse_tokens(self, tokens, debug=False):
+ """Parse a series of tokens and return the syntax tree."""
+ # XXX Move the prefix computation into a wrapper around tokenize.
+ p = parse.Parser(self.grammar, self.convert)
+ p.setup()
+ lineno = 1
+ column = 0
+ type = value = start = end = line_text = None
+ prefix = ""
+ for quintuple in tokens:
+ type, value, start, end, line_text = quintuple
+ if start != (lineno, column):
+ assert (lineno, column) <= start, ((lineno, column), start)
+ s_lineno, s_column = start
+ if lineno < s_lineno:
+ prefix += "\n" * (s_lineno - lineno)
+ lineno = s_lineno
+ column = 0
+ if column < s_column:
+ prefix += line_text[column:s_column]
+ column = s_column
+ if type in (tokenize.COMMENT, tokenize.NL):
+ prefix += value
+ lineno, column = end
+ if value.endswith("\n"):
+ lineno += 1
+ column = 0
+ continue
+ if type == token.OP:
+ type = grammar.opmap[value]
+ if debug:
+ self.logger.debug("%s %r (prefix=%r)",
+ token.tok_name[type], value, prefix)
+ if p.addtoken(type, value, (prefix, start)):
+ if debug:
+ self.logger.debug("Stop.")
+ break
+ prefix = ""
+ lineno, column = end
+ if value.endswith("\n"):
+ lineno += 1
+ column = 0
+ else:
+ # We never broke out -- EOF is too soon (how can this happen???)
+ raise parse.ParseError("incomplete input", t, v, x)
+ return p.rootnode
+
+ def parse_stream_raw(self, stream, debug=False):
+ """Parse a stream and return the syntax tree."""
+ tokens = tokenize.generate_tokens(stream.readline)
+ return self.parse_tokens(tokens, debug)
+
+ def parse_stream(self, stream, debug=False):
+ """Parse a stream and return the syntax tree."""
+ return self.parse_stream_raw(stream, debug)
+
+ def parse_file(self, filename, debug=False):
+ """Parse a file and return the syntax tree."""
+ stream = open(filename)
+ try:
+ return self.parse_stream(stream, debug)
+ finally:
+ stream.close()
+
+ def parse_string(self, text, debug=False):
+ """Parse a string and return the syntax tree."""
+ tokens = tokenize.generate_tokens(generate_lines(text).next)
+ return self.parse_tokens(tokens, debug)
+
+
+def generate_lines(text):
+ """Generator that behaves like readline without using StringIO."""
+ for line in text.splitlines(True):
+ yield line
+ while True:
+ yield ""
+
+
+def load_grammar(gt="Grammar.txt", gp=None,
+ save=True, force=False, logger=None):
+ """Load the grammar (maybe from a pickle)."""
+ if logger is None:
+ logger = logging.getLogger()
+ if gp is None:
+ head, tail = os.path.splitext(gt)
+ if tail == ".txt":
+ tail = ""
+ gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
+ if force or not _newer(gp, gt):
+ logger.info("Generating grammar tables from %s", gt)
+ g = pgen.generate_grammar(gt)
+ if save:
+ logger.info("Writing grammar tables to %s", gp)
+ try:
+ g.dump(gp)
+ except IOError, e:
+ logger.info("Writing failed:"+str(e))
+ else:
+ g = grammar.Grammar()
+ g.load(gp)
+ return g
+
+
+def _newer(a, b):
+ """Inquire whether file a was written since file b."""
+ if not os.path.exists(a):
+ return False
+ if not os.path.exists(b):
+ return True
+ return os.path.getmtime(a) >= os.path.getmtime(b)
diff --git a/sphinx/pycode/pgen2/grammar.py b/sphinx/pycode/pgen2/grammar.py
new file mode 100644
index 000000000..381d80e82
--- /dev/null
+++ b/sphinx/pycode/pgen2/grammar.py
@@ -0,0 +1,171 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""This module defines the data structures used to represent a grammar.
+
+These are a bit arcane because they are derived from the data
+structures used by Python's 'pgen' parser generator.
+
+There's also a table here mapping operators to their names in the
+token module; the Python tokenize module reports all operators as the
+fallback token code OP, but the parser needs the actual token code.
+
+"""
+
+# Python imports
+import pickle
+
+# Local imports
+from sphinx.pycode.pgen2 import token, tokenize
+
+
+class Grammar(object):
+ """Pgen parsing tables tables conversion class.
+
+ Once initialized, this class supplies the grammar tables for the
+ parsing engine implemented by parse.py. The parsing engine
+ accesses the instance variables directly. The class here does not
+ provide initialization of the tables; several subclasses exist to
+ do this (see the conv and pgen modules).
+
+ The load() method reads the tables from a pickle file, which is
+ much faster than the other ways offered by subclasses. The pickle
+ file is written by calling dump() (after loading the grammar
+ tables using a subclass). The report() method prints a readable
+ representation of the tables to stdout, for debugging.
+
+ The instance variables are as follows:
+
+ symbol2number -- a dict mapping symbol names to numbers. Symbol
+ numbers are always 256 or higher, to distinguish
+ them from token numbers, which are between 0 and
+ 255 (inclusive).
+
+ number2symbol -- a dict mapping numbers to symbol names;
+ these two are each other's inverse.
+
+ states -- a list of DFAs, where each DFA is a list of
+ states, each state is is a list of arcs, and each
+ arc is a (i, j) pair where i is a label and j is
+ a state number. The DFA number is the index into
+ this list. (This name is slightly confusing.)
+ Final states are represented by a special arc of
+ the form (0, j) where j is its own state number.
+
+ dfas -- a dict mapping symbol numbers to (DFA, first)
+ pairs, where DFA is an item from the states list
+ above, and first is a set of tokens that can
+ begin this grammar rule (represented by a dict
+ whose values are always 1).
+
+ labels -- a list of (x, y) pairs where x is either a token
+ number or a symbol number, and y is either None
+ or a string; the strings are keywords. The label
+ number is the index in this list; label numbers
+ are used to mark state transitions (arcs) in the
+ DFAs.
+
+ start -- the number of the grammar's start symbol.
+
+ keywords -- a dict mapping keyword strings to arc labels.
+
+ tokens -- a dict mapping token numbers to arc labels.
+
+ """
+
+ def __init__(self):
+ self.symbol2number = {}
+ self.number2symbol = {}
+ self.states = []
+ self.dfas = {}
+ self.labels = [(0, "EMPTY")]
+ self.keywords = {}
+ self.tokens = {}
+ self.symbol2label = {}
+ self.start = 256
+
+ def dump(self, filename):
+ """Dump the grammar tables to a pickle file."""
+ f = open(filename, "wb")
+ pickle.dump(self.__dict__, f, 2)
+ f.close()
+
+ def load(self, filename):
+ """Load the grammar tables from a pickle file."""
+ f = open(filename, "rb")
+ d = pickle.load(f)
+ f.close()
+ self.__dict__.update(d)
+
+ def report(self):
+ """Dump the grammar tables to standard output, for debugging."""
+ from pprint import pprint
+ print "s2n"
+ pprint(self.symbol2number)
+ print "n2s"
+ pprint(self.number2symbol)
+ print "states"
+ pprint(self.states)
+ print "dfas"
+ pprint(self.dfas)
+ print "labels"
+ pprint(self.labels)
+ print "start", self.start
+
+
+# Map from operator to number (since tokenize doesn't do this)
+
+opmap_raw = """
+( LPAR
+) RPAR
+[ LSQB
+] RSQB
+: COLON
+, COMMA
+; SEMI
++ PLUS
+- MINUS
+* STAR
+/ SLASH
+| VBAR
+& AMPER
+< LESS
+> GREATER
+= EQUAL
+. DOT
+% PERCENT
+` BACKQUOTE
+{ LBRACE
+} RBRACE
+@ AT
+== EQEQUAL
+!= NOTEQUAL
+<> NOTEQUAL
+<= LESSEQUAL
+>= GREATEREQUAL
+~ TILDE
+^ CIRCUMFLEX
+<< LEFTSHIFT
+>> RIGHTSHIFT
+** DOUBLESTAR
++= PLUSEQUAL
+-= MINEQUAL
+*= STAREQUAL
+/= SLASHEQUAL
+%= PERCENTEQUAL
+&= AMPEREQUAL
+|= VBAREQUAL
+^= CIRCUMFLEXEQUAL
+<<= LEFTSHIFTEQUAL
+>>= RIGHTSHIFTEQUAL
+**= DOUBLESTAREQUAL
+// DOUBLESLASH
+//= DOUBLESLASHEQUAL
+-> RARROW
+"""
+
+opmap = {}
+for line in opmap_raw.splitlines():
+ if line:
+ op, name = line.split()
+ opmap[op] = getattr(token, name)
diff --git a/sphinx/pycode/pgen2/literals.py b/sphinx/pycode/pgen2/literals.py
new file mode 100644
index 000000000..0b3948a54
--- /dev/null
+++ b/sphinx/pycode/pgen2/literals.py
@@ -0,0 +1,60 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Safely evaluate Python string literals without using eval()."""
+
+import re
+
+simple_escapes = {"a": "\a",
+ "b": "\b",
+ "f": "\f",
+ "n": "\n",
+ "r": "\r",
+ "t": "\t",
+ "v": "\v",
+ "'": "'",
+ '"': '"',
+ "\\": "\\"}
+
+def escape(m):
+ all, tail = m.group(0, 1)
+ assert all.startswith("\\")
+ esc = simple_escapes.get(tail)
+ if esc is not None:
+ return esc
+ if tail.startswith("x"):
+ hexes = tail[1:]
+ if len(hexes) < 2:
+ raise ValueError("invalid hex string escape ('\\%s')" % tail)
+ try:
+ i = int(hexes, 16)
+ except ValueError:
+ raise ValueError("invalid hex string escape ('\\%s')" % tail)
+ else:
+ try:
+ i = int(tail, 8)
+ except ValueError:
+ raise ValueError("invalid octal string escape ('\\%s')" % tail)
+ return chr(i)
+
+def evalString(s):
+ assert s.startswith("'") or s.startswith('"'), repr(s[:1])
+ q = s[0]
+ if s[:3] == q*3:
+ q = q*3
+ assert s.endswith(q), repr(s[-len(q):])
+ assert len(s) >= 2*len(q)
+ s = s[len(q):-len(q)]
+ return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
+
+def test():
+ for i in range(256):
+ c = chr(i)
+ s = repr(c)
+ e = evalString(s)
+ if e != c:
+ print i, c, s, e
+
+
+if __name__ == "__main__":
+ test()
diff --git a/sphinx/pycode/pgen2/parse.py b/sphinx/pycode/pgen2/parse.py
new file mode 100644
index 000000000..60eec05ea
--- /dev/null
+++ b/sphinx/pycode/pgen2/parse.py
@@ -0,0 +1,201 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Parser engine for the grammar tables generated by pgen.
+
+The grammar table must be loaded first.
+
+See Parser/parser.c in the Python distribution for additional info on
+how this parsing engine works.
+
+"""
+
+# Local imports
+from sphinx.pycode.pgen2 import token
+
+class ParseError(Exception):
+ """Exception to signal the parser is stuck."""
+
+ def __init__(self, msg, type, value, context):
+ Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
+ (msg, type, value, context))
+ self.msg = msg
+ self.type = type
+ self.value = value
+ self.context = context
+
+class Parser(object):
+ """Parser engine.
+
+ The proper usage sequence is:
+
+ p = Parser(grammar, [converter]) # create instance
+ p.setup([start]) # prepare for parsing
+ <for each input token>:
+ if p.addtoken(...): # parse a token; may raise ParseError
+ break
+ root = p.rootnode # root of abstract syntax tree
+
+ A Parser instance may be reused by calling setup() repeatedly.
+
+ A Parser instance contains state pertaining to the current token
+ sequence, and should not be used concurrently by different threads
+ to parse separate token sequences.
+
+ See driver.py for how to get input tokens by tokenizing a file or
+ string.
+
+ Parsing is complete when addtoken() returns True; the root of the
+ abstract syntax tree can then be retrieved from the rootnode
+ instance variable. When a syntax error occurs, addtoken() raises
+ the ParseError exception. There is no error recovery; the parser
+ cannot be used after a syntax error was reported (but it can be
+ reinitialized by calling setup()).
+
+ """
+
+ def __init__(self, grammar, convert=None):
+ """Constructor.
+
+ The grammar argument is a grammar.Grammar instance; see the
+ grammar module for more information.
+
+ The parser is not ready yet for parsing; you must call the
+ setup() method to get it started.
+
+ The optional convert argument is a function mapping concrete
+ syntax tree nodes to abstract syntax tree nodes. If not
+ given, no conversion is done and the syntax tree produced is
+ the concrete syntax tree. If given, it must be a function of
+ two arguments, the first being the grammar (a grammar.Grammar
+ instance), and the second being the concrete syntax tree node
+ to be converted. The syntax tree is converted from the bottom
+ up.
+
+ A concrete syntax tree node is a (type, value, context, nodes)
+ tuple, where type is the node type (a token or symbol number),
+ value is None for symbols and a string for tokens, context is
+ None or an opaque value used for error reporting (typically a
+ (lineno, offset) pair), and nodes is a list of children for
+ symbols, and None for tokens.
+
+ An abstract syntax tree node may be anything; this is entirely
+ up to the converter function.
+
+ """
+ self.grammar = grammar
+ self.convert = convert or (lambda grammar, node: node)
+
+ def setup(self, start=None):
+ """Prepare for parsing.
+
+ This *must* be called before starting to parse.
+
+ The optional argument is an alternative start symbol; it
+ defaults to the grammar's start symbol.
+
+ You can use a Parser instance to parse any number of programs;
+ each time you call setup() the parser is reset to an initial
+ state determined by the (implicit or explicit) start symbol.
+
+ """
+ if start is None:
+ start = self.grammar.start
+ # Each stack entry is a tuple: (dfa, state, node).
+ # A node is a tuple: (type, value, context, children),
+ # where children is a list of nodes or None, and context may be None.
+ newnode = (start, None, None, [])
+ stackentry = (self.grammar.dfas[start], 0, newnode)
+ self.stack = [stackentry]
+ self.rootnode = None
+ self.used_names = set() # Aliased to self.rootnode.used_names in pop()
+
+ def addtoken(self, type, value, context):
+ """Add a token; return True iff this is the end of the program."""
+ # Map from token to label
+ ilabel = self.classify(type, value, context)
+ # Loop until the token is shifted; may raise exceptions
+ while True:
+ dfa, state, node = self.stack[-1]
+ states, first = dfa
+ arcs = states[state]
+ # Look for a state with this label
+ for i, newstate in arcs:
+ t, v = self.grammar.labels[i]
+ if ilabel == i:
+ # Look it up in the list of labels
+ assert t < 256
+ # Shift a token; we're done with it
+ self.shift(type, value, newstate, context)
+ # Pop while we are in an accept-only state
+ state = newstate
+ while states[state] == [(0, state)]:
+ self.pop()
+ if not self.stack:
+ # Done parsing!
+ return True
+ dfa, state, node = self.stack[-1]
+ states, first = dfa
+ # Done with this token
+ return False
+ elif t >= 256:
+ # See if it's a symbol and if we're in its first set
+ itsdfa = self.grammar.dfas[t]
+ itsstates, itsfirst = itsdfa
+ if ilabel in itsfirst:
+ # Push a symbol
+ self.push(t, self.grammar.dfas[t], newstate, context)
+ break # To continue the outer while loop
+ else:
+ if (0, state) in arcs:
+ # An accepting state, pop it and try something else
+ self.pop()
+ if not self.stack:
+ # Done parsing, but another token is input
+ raise ParseError("too much input",
+ type, value, context)
+ else:
+ # No success finding a transition
+ raise ParseError("bad input", type, value, context)
+
+ def classify(self, type, value, context):
+ """Turn a token into a label. (Internal)"""
+ if type == token.NAME:
+ # Keep a listing of all used names
+ self.used_names.add(value)
+ # Check for reserved words
+ ilabel = self.grammar.keywords.get(value)
+ if ilabel is not None:
+ return ilabel
+ ilabel = self.grammar.tokens.get(type)
+ if ilabel is None:
+ raise ParseError("bad token", type, value, context)
+ return ilabel
+
+ def shift(self, type, value, newstate, context):
+ """Shift a token. (Internal)"""
+ dfa, state, node = self.stack[-1]
+ newnode = (type, value, context, None)
+ newnode = self.convert(self.grammar, newnode)
+ if newnode is not None:
+ node[-1].append(newnode)
+ self.stack[-1] = (dfa, newstate, node)
+
+ def push(self, type, newdfa, newstate, context):
+ """Push a nonterminal. (Internal)"""
+ dfa, state, node = self.stack[-1]
+ newnode = (type, None, context, [])
+ self.stack[-1] = (dfa, newstate, node)
+ self.stack.append((newdfa, 0, newnode))
+
+ def pop(self):
+ """Pop a nonterminal. (Internal)"""
+ popdfa, popstate, popnode = self.stack.pop()
+ newnode = self.convert(self.grammar, popnode)
+ if newnode is not None:
+ if self.stack:
+ dfa, state, node = self.stack[-1]
+ node[-1].append(newnode)
+ else:
+ self.rootnode = newnode
+ self.rootnode.used_names = self.used_names
diff --git a/sphinx/pycode/pgen2/pgen.py b/sphinx/pycode/pgen2/pgen.py
new file mode 100644
index 000000000..d6895eaef
--- /dev/null
+++ b/sphinx/pycode/pgen2/pgen.py
@@ -0,0 +1,384 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+# Pgen imports
+from sphinx.pycode.pgen2 import grammar, token, tokenize
+
+class PgenGrammar(grammar.Grammar):
+ pass
+
+class ParserGenerator(object):
+
+ def __init__(self, filename, stream=None):
+ close_stream = None
+ if stream is None:
+ stream = open(filename)
+ close_stream = stream.close
+ self.filename = filename
+ self.stream = stream
+ self.generator = tokenize.generate_tokens(stream.readline)
+ self.gettoken() # Initialize lookahead
+ self.dfas, self.startsymbol = self.parse()
+ if close_stream is not None:
+ close_stream()
+ self.first = {} # map from symbol name to set of tokens
+ self.addfirstsets()
+
+ def make_grammar(self):
+ c = PgenGrammar()
+ names = self.dfas.keys()
+ names.sort()
+ names.remove(self.startsymbol)
+ names.insert(0, self.startsymbol)
+ for name in names:
+ i = 256 + len(c.symbol2number)
+ c.symbol2number[name] = i
+ c.number2symbol[i] = name
+ for name in names:
+ dfa = self.dfas[name]
+ states = []
+ for state in dfa:
+ arcs = []
+ for label, next in state.arcs.iteritems():
+ arcs.append((self.make_label(c, label), dfa.index(next)))
+ if state.isfinal:
+ arcs.append((0, dfa.index(state)))
+ states.append(arcs)
+ c.states.append(states)
+ c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
+ c.start = c.symbol2number[self.startsymbol]
+ return c
+
+ def make_first(self, c, name):
+ rawfirst = self.first[name]
+ first = {}
+ for label in rawfirst:
+ ilabel = self.make_label(c, label)
+ ##assert ilabel not in first # XXX failed on <> ... !=
+ first[ilabel] = 1
+ return first
+
+ def make_label(self, c, label):
+ # XXX Maybe this should be a method on a subclass of converter?
+ ilabel = len(c.labels)
+ if label[0].isalpha():
+ # Either a symbol name or a named token
+ if label in c.symbol2number:
+ # A symbol name (a non-terminal)
+ if label in c.symbol2label:
+ return c.symbol2label[label]
+ else:
+ c.labels.append((c.symbol2number[label], None))
+ c.symbol2label[label] = ilabel
+ return ilabel
+ else:
+ # A named token (NAME, NUMBER, STRING)
+ itoken = getattr(token, label, None)
+ assert isinstance(itoken, int), label
+ assert itoken in token.tok_name, label
+ if itoken in c.tokens:
+ return c.tokens[itoken]
+ else:
+ c.labels.append((itoken, None))
+ c.tokens[itoken] = ilabel
+ return ilabel
+ else:
+ # Either a keyword or an operator
+ assert label[0] in ('"', "'"), label
+ value = eval(label)
+ if value[0].isalpha():
+ # A keyword
+ if value in c.keywords:
+ return c.keywords[value]
+ else:
+ c.labels.append((token.NAME, value))
+ c.keywords[value] = ilabel
+ return ilabel
+ else:
+ # An operator (any non-numeric token)
+ itoken = grammar.opmap[value] # Fails if unknown token
+ if itoken in c.tokens:
+ return c.tokens[itoken]
+ else:
+ c.labels.append((itoken, None))
+ c.tokens[itoken] = ilabel
+ return ilabel
+
+ def addfirstsets(self):
+ names = self.dfas.keys()
+ names.sort()
+ for name in names:
+ if name not in self.first:
+ self.calcfirst(name)
+ #print name, self.first[name].keys()
+
+ def calcfirst(self, name):
+ dfa = self.dfas[name]
+ self.first[name] = None # dummy to detect left recursion
+ state = dfa[0]
+ totalset = {}
+ overlapcheck = {}
+ for label, next in state.arcs.iteritems():
+ if label in self.dfas:
+ if label in self.first:
+ fset = self.first[label]
+ if fset is None:
+ raise ValueError("recursion for rule %r" % name)
+ else:
+ self.calcfirst(label)
+ fset = self.first[label]
+ totalset.update(fset)
+ overlapcheck[label] = fset
+ else:
+ totalset[label] = 1
+ overlapcheck[label] = {label: 1}
+ inverse = {}
+ for label, itsfirst in overlapcheck.iteritems():
+ for symbol in itsfirst:
+ if symbol in inverse:
+ raise ValueError("rule %s is ambiguous; %s is in the"
+ " first sets of %s as well as %s" %
+ (name, symbol, label, inverse[symbol]))
+ inverse[symbol] = label
+ self.first[name] = totalset
+
+ def parse(self):
+ dfas = {}
+ startsymbol = None
+ # MSTART: (NEWLINE | RULE)* ENDMARKER
+ while self.type != token.ENDMARKER:
+ while self.type == token.NEWLINE:
+ self.gettoken()
+ # RULE: NAME ':' RHS NEWLINE
+ name = self.expect(token.NAME)
+ self.expect(token.OP, ":")
+ a, z = self.parse_rhs()
+ self.expect(token.NEWLINE)
+ #self.dump_nfa(name, a, z)
+ dfa = self.make_dfa(a, z)
+ #self.dump_dfa(name, dfa)
+ oldlen = len(dfa)
+ self.simplify_dfa(dfa)
+ newlen = len(dfa)
+ dfas[name] = dfa
+ #print name, oldlen, newlen
+ if startsymbol is None:
+ startsymbol = name
+ return dfas, startsymbol
+
+ def make_dfa(self, start, finish):
+ # To turn an NFA into a DFA, we define the states of the DFA
+ # to correspond to *sets* of states of the NFA. Then do some
+ # state reduction. Let's represent sets as dicts with 1 for
+ # values.
+ assert isinstance(start, NFAState)
+ assert isinstance(finish, NFAState)
+ def closure(state):
+ base = {}
+ addclosure(state, base)
+ return base
+ def addclosure(state, base):
+ assert isinstance(state, NFAState)
+ if state in base:
+ return
+ base[state] = 1
+ for label, next in state.arcs:
+ if label is None:
+ addclosure(next, base)
+ states = [DFAState(closure(start), finish)]
+ for state in states: # NB states grows while we're iterating
+ arcs = {}
+ for nfastate in state.nfaset:
+ for label, next in nfastate.arcs:
+ if label is not None:
+ addclosure(next, arcs.setdefault(label, {}))
+ for label, nfaset in arcs.iteritems():
+ for st in states:
+ if st.nfaset == nfaset:
+ break
+ else:
+ st = DFAState(nfaset, finish)
+ states.append(st)
+ state.addarc(st, label)
+ return states # List of DFAState instances; first one is start
+
+ def dump_nfa(self, name, start, finish):
+ print "Dump of NFA for", name
+ todo = [start]
+ for i, state in enumerate(todo):
+ print " State", i, state is finish and "(final)" or ""
+ for label, next in state.arcs:
+ if next in todo:
+ j = todo.index(next)
+ else:
+ j = len(todo)
+ todo.append(next)
+ if label is None:
+ print " -> %d" % j
+ else:
+ print " %s -> %d" % (label, j)
+
+ def dump_dfa(self, name, dfa):
+ print "Dump of DFA for", name
+ for i, state in enumerate(dfa):
+ print " State", i, state.isfinal and "(final)" or ""
+ for label, next in state.arcs.iteritems():
+ print " %s -> %d" % (label, dfa.index(next))
+
+ def simplify_dfa(self, dfa):
+ # This is not theoretically optimal, but works well enough.
+ # Algorithm: repeatedly look for two states that have the same
+ # set of arcs (same labels pointing to the same nodes) and
+ # unify them, until things stop changing.
+
+ # dfa is a list of DFAState instances
+ changes = True
+ while changes:
+ changes = False
+ for i, state_i in enumerate(dfa):
+ for j in range(i+1, len(dfa)):
+ state_j = dfa[j]
+ if state_i == state_j:
+ #print " unify", i, j
+ del dfa[j]
+ for state in dfa:
+ state.unifystate(state_j, state_i)
+ changes = True
+ break
+
+ def parse_rhs(self):
+ # RHS: ALT ('|' ALT)*
+ a, z = self.parse_alt()
+ if self.value != "|":
+ return a, z
+ else:
+ aa = NFAState()
+ zz = NFAState()
+ aa.addarc(a)
+ z.addarc(zz)
+ while self.value == "|":
+ self.gettoken()
+ a, z = self.parse_alt()
+ aa.addarc(a)
+ z.addarc(zz)
+ return aa, zz
+
+ def parse_alt(self):
+ # ALT: ITEM+
+ a, b = self.parse_item()
+ while (self.value in ("(", "[") or
+ self.type in (token.NAME, token.STRING)):
+ c, d = self.parse_item()
+ b.addarc(c)
+ b = d
+ return a, b
+
+ def parse_item(self):
+ # ITEM: '[' RHS ']' | ATOM ['+' | '*']
+ if self.value == "[":
+ self.gettoken()
+ a, z = self.parse_rhs()
+ self.expect(token.OP, "]")
+ a.addarc(z)
+ return a, z
+ else:
+ a, z = self.parse_atom()
+ value = self.value
+ if value not in ("+", "*"):
+ return a, z
+ self.gettoken()
+ z.addarc(a)
+ if value == "+":
+ return a, z
+ else:
+ return a, a
+
+ def parse_atom(self):
+ # ATOM: '(' RHS ')' | NAME | STRING
+ if self.value == "(":
+ self.gettoken()
+ a, z = self.parse_rhs()
+ self.expect(token.OP, ")")
+ return a, z
+ elif self.type in (token.NAME, token.STRING):
+ a = NFAState()
+ z = NFAState()
+ a.addarc(z, self.value)
+ self.gettoken()
+ return a, z
+ else:
+ self.raise_error("expected (...) or NAME or STRING, got %s/%s",
+ self.type, self.value)
+
+ def expect(self, type, value=None):
+ if self.type != type or (value is not None and self.value != value):
+ self.raise_error("expected %s/%s, got %s/%s",
+ type, value, self.type, self.value)
+ value = self.value
+ self.gettoken()
+ return value
+
+ def gettoken(self):
+ tup = self.generator.next()
+ while tup[0] in (tokenize.COMMENT, tokenize.NL):
+ tup = self.generator.next()
+ self.type, self.value, self.begin, self.end, self.line = tup
+ #print token.tok_name[self.type], repr(self.value)
+
+ def raise_error(self, msg, *args):
+ if args:
+ try:
+ msg = msg % args
+ except:
+ msg = " ".join([msg] + map(str, args))
+ raise SyntaxError(msg, (self.filename, self.end[0],
+ self.end[1], self.line))
+
+class NFAState(object):
+
+ def __init__(self):
+ self.arcs = [] # list of (label, NFAState) pairs
+
+ def addarc(self, next, label=None):
+ assert label is None or isinstance(label, str)
+ assert isinstance(next, NFAState)
+ self.arcs.append((label, next))
+
+class DFAState(object):
+
+ def __init__(self, nfaset, final):
+ assert isinstance(nfaset, dict)
+ assert isinstance(iter(nfaset).next(), NFAState)
+ assert isinstance(final, NFAState)
+ self.nfaset = nfaset
+ self.isfinal = final in nfaset
+ self.arcs = {} # map from label to DFAState
+
+ def addarc(self, next, label):
+ assert isinstance(label, str)
+ assert label not in self.arcs
+ assert isinstance(next, DFAState)
+ self.arcs[label] = next
+
+ def unifystate(self, old, new):
+ for label, next in self.arcs.iteritems():
+ if next is old:
+ self.arcs[label] = new
+
+ def __eq__(self, other):
+ # Equality test -- ignore the nfaset instance variable
+ assert isinstance(other, DFAState)
+ if self.isfinal != other.isfinal:
+ return False
+ # Can't just return self.arcs == other.arcs, because that
+ # would invoke this method recursively, with cycles...
+ if len(self.arcs) != len(other.arcs):
+ return False
+ for label, next in self.arcs.iteritems():
+ if next is not other.arcs.get(label):
+ return False
+ return True
+
+def generate_grammar(filename="Grammar.txt"):
+ p = ParserGenerator(filename)
+ return p.make_grammar()
diff --git a/sphinx/pycode/pgen2/token.py b/sphinx/pycode/pgen2/token.py
new file mode 100755
index 000000000..61468b313
--- /dev/null
+++ b/sphinx/pycode/pgen2/token.py
@@ -0,0 +1,82 @@
+#! /usr/bin/env python
+
+"""Token constants (from "token.h")."""
+
+# Taken from Python (r53757) and modified to include some tokens
+# originally monkeypatched in by pgen2.tokenize
+
+#--start constants--
+ENDMARKER = 0
+NAME = 1
+NUMBER = 2
+STRING = 3
+NEWLINE = 4
+INDENT = 5
+DEDENT = 6
+LPAR = 7
+RPAR = 8
+LSQB = 9
+RSQB = 10
+COLON = 11
+COMMA = 12
+SEMI = 13
+PLUS = 14
+MINUS = 15
+STAR = 16
+SLASH = 17
+VBAR = 18
+AMPER = 19
+LESS = 20
+GREATER = 21
+EQUAL = 22
+DOT = 23
+PERCENT = 24
+BACKQUOTE = 25
+LBRACE = 26
+RBRACE = 27
+EQEQUAL = 28
+NOTEQUAL = 29
+LESSEQUAL = 30
+GREATEREQUAL = 31
+TILDE = 32
+CIRCUMFLEX = 33
+LEFTSHIFT = 34
+RIGHTSHIFT = 35
+DOUBLESTAR = 36
+PLUSEQUAL = 37
+MINEQUAL = 38
+STAREQUAL = 39
+SLASHEQUAL = 40
+PERCENTEQUAL = 41
+AMPEREQUAL = 42
+VBAREQUAL = 43
+CIRCUMFLEXEQUAL = 44
+LEFTSHIFTEQUAL = 45
+RIGHTSHIFTEQUAL = 46
+DOUBLESTAREQUAL = 47
+DOUBLESLASH = 48
+DOUBLESLASHEQUAL = 49
+AT = 50
+OP = 51
+COMMENT = 52
+NL = 53
+RARROW = 54
+ERRORTOKEN = 55
+N_TOKENS = 56
+NT_OFFSET = 256
+#--end constants--
+
+tok_name = {}
+for _name, _value in globals().items():
+ if type(_value) is type(0):
+ tok_name[_value] = _name
+
+
+def ISTERMINAL(x):
+ return x < NT_OFFSET
+
+def ISNONTERMINAL(x):
+ return x >= NT_OFFSET
+
+def ISEOF(x):
+ return x == ENDMARKER
diff --git a/sphinx/pycode/pgen2/tokenize.py b/sphinx/pycode/pgen2/tokenize.py
new file mode 100644
index 000000000..46ee7842b
--- /dev/null
+++ b/sphinx/pycode/pgen2/tokenize.py
@@ -0,0 +1,405 @@
+# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
+# All rights reserved.
+
+"""Tokenization help for Python programs.
+
+generate_tokens(readline) is a generator that breaks a stream of
+text into Python tokens. It accepts a readline-like method which is called
+repeatedly to get the next line of input (or "" for EOF). It generates
+5-tuples with these members:
+
+ the token type (see token.py)
+ the token (a string)
+ the starting (row, column) indices of the token (a 2-tuple of ints)
+ the ending (row, column) indices of the token (a 2-tuple of ints)
+ the original line (string)
+
+It is designed to match the working of the Python tokenizer exactly, except
+that it produces COMMENT tokens for comments and gives type OP for all
+operators
+
+Older entry points
+ tokenize_loop(readline, tokeneater)
+ tokenize(readline, tokeneater=printtoken)
+are the same, except instead of generating tokens, tokeneater is a callback
+function to which the 5 fields described above are passed as 5 arguments,
+each time a new token is found."""
+
+__author__ = 'Ka-Ping Yee <ping@lfw.org>'
+__credits__ = \
+ 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
+
+import string, re
+from sphinx.pycode.pgen2.token import *
+from sphinx.pycode.pgen2 import token
+
+__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
+ "generate_tokens", "untokenize"]
+del token
+
+def group(*choices): return '(' + '|'.join(choices) + ')'
+def any(*choices): return group(*choices) + '*'
+def maybe(*choices): return group(*choices) + '?'
+
+Whitespace = r'[ \f\t]*'
+Comment = r'#[^\r\n]*'
+Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
+Name = r'[a-zA-Z_]\w*'
+
+Binnumber = r'0[bB][01]*'
+Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
+Octnumber = r'0[oO]?[0-7]*[lL]?'
+Decnumber = r'[1-9]\d*[lL]?'
+Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
+Exponent = r'[eE][-+]?\d+'
+Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
+Expfloat = r'\d+' + Exponent
+Floatnumber = group(Pointfloat, Expfloat)
+Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
+Number = group(Imagnumber, Floatnumber, Intnumber)
+
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
+# Single-line ' or " string.
+String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+ r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+
+# Because of leftmost-then-longest match semantics, be sure to put the
+# longest operators first (e.g., if = came before ==, == would get
+# recognized as two instances of =).
+Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
+ r"//=?", r"->",
+ r"[+\-*/%&|^=<>]=?",
+ r"~")
+
+Bracket = '[][(){}]'
+Special = group(r'\r?\n', r'[:;.,`@]')
+Funny = group(Operator, Bracket, Special)
+
+PlainToken = group(Number, Funny, String, Name)
+Token = Ignore + PlainToken
+
+# First (or only) line of ' or " string.
+ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+ group("'", r'\\\r?\n'),
+ r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+ group('"', r'\\\r?\n'))
+PseudoExtras = group(r'\\\r?\n', Comment, Triple)
+PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
+
+tokenprog, pseudoprog, single3prog, double3prog = map(
+ re.compile, (Token, PseudoToken, Single3, Double3))
+endprogs = {"'": re.compile(Single), '"': re.compile(Double),
+ "'''": single3prog, '"""': double3prog,
+ "r'''": single3prog, 'r"""': double3prog,
+ "u'''": single3prog, 'u"""': double3prog,
+ "b'''": single3prog, 'b"""': double3prog,
+ "ur'''": single3prog, 'ur"""': double3prog,
+ "br'''": single3prog, 'br"""': double3prog,
+ "R'''": single3prog, 'R"""': double3prog,
+ "U'''": single3prog, 'U"""': double3prog,
+ "B'''": single3prog, 'B"""': double3prog,
+ "uR'''": single3prog, 'uR"""': double3prog,
+ "Ur'''": single3prog, 'Ur"""': double3prog,
+ "UR'''": single3prog, 'UR"""': double3prog,
+ "bR'''": single3prog, 'bR"""': double3prog,
+ "Br'''": single3prog, 'Br"""': double3prog,
+ "BR'''": single3prog, 'BR"""': double3prog,
+ 'r': None, 'R': None,
+ 'u': None, 'U': None,
+ 'b': None, 'B': None}
+
+triple_quoted = {}
+for t in ("'''", '"""',
+ "r'''", 'r"""', "R'''", 'R"""',
+ "u'''", 'u"""', "U'''", 'U"""',
+ "b'''", 'b"""', "B'''", 'B"""',
+ "ur'''", 'ur"""', "Ur'''", 'Ur"""',
+ "uR'''", 'uR"""', "UR'''", 'UR"""',
+ "br'''", 'br"""', "Br'''", 'Br"""',
+ "bR'''", 'bR"""', "BR'''", 'BR"""',):
+ triple_quoted[t] = t
+single_quoted = {}
+for t in ("'", '"',
+ "r'", 'r"', "R'", 'R"',
+ "u'", 'u"', "U'", 'U"',
+ "b'", 'b"', "B'", 'B"',
+ "ur'", 'ur"', "Ur'", 'Ur"',
+ "uR'", 'uR"', "UR'", 'UR"',
+ "br'", 'br"', "Br'", 'Br"',
+ "bR'", 'bR"', "BR'", 'BR"', ):
+ single_quoted[t] = t
+
+tabsize = 8
+
+class TokenError(Exception): pass
+
+class StopTokenizing(Exception): pass
+
+def printtoken(type, token, (srow, scol), (erow, ecol), line): # for testing
+ print "%d,%d-%d,%d:\t%s\t%s" % \
+ (srow, scol, erow, ecol, tok_name[type], repr(token))
+
+def tokenize(readline, tokeneater=printtoken):
+ """
+ The tokenize() function accepts two parameters: one representing the
+ input stream, and one providing an output mechanism for tokenize().
+
+ The first parameter, readline, must be a callable object which provides
+ the same interface as the readline() method of built-in file objects.
+ Each call to the function should return one line of input as a string.
+
+ The second parameter, tokeneater, must also be a callable object. It is
+ called once for each token, with five arguments, corresponding to the
+ tuples generated by generate_tokens().
+ """
+ try:
+ tokenize_loop(readline, tokeneater)
+ except StopTokenizing:
+ pass
+
+# backwards compatible interface
+def tokenize_loop(readline, tokeneater):
+ for token_info in generate_tokens(readline):
+ tokeneater(*token_info)
+
+class Untokenizer:
+
+ def __init__(self):
+ self.tokens = []
+ self.prev_row = 1
+ self.prev_col = 0
+
+ def add_whitespace(self, start):
+ row, col = start
+ assert row <= self.prev_row
+ col_offset = col - self.prev_col
+ if col_offset:
+ self.tokens.append(" " * col_offset)
+
+ def untokenize(self, iterable):
+ for t in iterable:
+ if len(t) == 2:
+ self.compat(t, iterable)
+ break
+ tok_type, token, start, end, line = t
+ self.add_whitespace(start)
+ self.tokens.append(token)
+ self.prev_row, self.prev_col = end
+ if tok_type in (NEWLINE, NL):
+ self.prev_row += 1
+ self.prev_col = 0
+ return "".join(self.tokens)
+
+ def compat(self, token, iterable):
+ startline = False
+ indents = []
+ toks_append = self.tokens.append
+ toknum, tokval = token
+ if toknum in (NAME, NUMBER):
+ tokval += ' '
+ if toknum in (NEWLINE, NL):
+ startline = True
+ for tok in iterable:
+ toknum, tokval = tok[:2]
+
+ if toknum in (NAME, NUMBER):
+ tokval += ' '
+
+ if toknum == INDENT:
+ indents.append(tokval)
+ continue
+ elif toknum == DEDENT:
+ indents.pop()
+ continue
+ elif toknum in (NEWLINE, NL):
+ startline = True
+ elif startline and indents:
+ toks_append(indents[-1])
+ startline = False
+ toks_append(tokval)
+
+def untokenize(iterable):
+ """Transform tokens back into Python source code.
+
+ Each element returned by the iterable must be a token sequence
+ with at least two elements, a token number and token value. If
+ only two tokens are passed, the resulting output is poor.
+
+ Round-trip invariant for full input:
+ Untokenized source will match input source exactly
+
+ Round-trip invariant for limited intput:
+ # Output text will tokenize the back to the input
+ t1 = [tok[:2] for tok in generate_tokens(f.readline)]
+ newcode = untokenize(t1)
+ readline = iter(newcode.splitlines(1)).next
+ t2 = [tok[:2] for tokin generate_tokens(readline)]
+ assert t1 == t2
+ """
+ ut = Untokenizer()
+ return ut.untokenize(iterable)
+
+def generate_tokens(readline):
+ """
+ The generate_tokens() generator requires one argment, readline, which
+ must be a callable object which provides the same interface as the
+ readline() method of built-in file objects. Each call to the function
+ should return one line of input as a string. Alternately, readline
+ can be a callable function terminating with StopIteration:
+ readline = open(myfile).next # Example of alternate readline
+
+ The generator produces 5-tuples with these members: the token type; the
+ token string; a 2-tuple (srow, scol) of ints specifying the row and
+ column where the token begins in the source; a 2-tuple (erow, ecol) of
+ ints specifying the row and column where the token ends in the source;
+ and the line on which the token was found. The line passed is the
+ logical line; continuation lines are included.
+ """
+ lnum = parenlev = continued = 0
+ namechars, numchars = string.ascii_letters + '_', '0123456789'
+ contstr, needcont = '', 0
+ contline = None
+ indents = [0]
+
+ while 1: # loop over lines in stream
+ try:
+ line = readline()
+ except StopIteration:
+ line = ''
+ lnum = lnum + 1
+ pos, max = 0, len(line)
+
+ if contstr: # continued string
+ if not line:
+ raise TokenError, ("EOF in multi-line string", strstart)
+ endmatch = endprog.match(line)
+ if endmatch:
+ pos = end = endmatch.end(0)
+ yield (STRING, contstr + line[:end],
+ strstart, (lnum, end), contline + line)
+ contstr, needcont = '', 0
+ contline = None
+ elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
+ yield (ERRORTOKEN, contstr + line,
+ strstart, (lnum, len(line)), contline)
+ contstr = ''
+ contline = None
+ continue
+ else:
+ contstr = contstr + line
+ contline = contline + line
+ continue
+
+ elif parenlev == 0 and not continued: # new statement
+ if not line: break
+ column = 0
+ while pos < max: # measure leading whitespace
+ if line[pos] == ' ': column = column + 1
+ elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize
+ elif line[pos] == '\f': column = 0
+ else: break
+ pos = pos + 1
+ if pos == max: break
+
+ if line[pos] in '#\r\n': # skip comments or blank lines
+ if line[pos] == '#':
+ comment_token = line[pos:].rstrip('\r\n')
+ nl_pos = pos + len(comment_token)
+ yield (COMMENT, comment_token,
+ (lnum, pos), (lnum, pos + len(comment_token)), line)
+ yield (NL, line[nl_pos:],
+ (lnum, nl_pos), (lnum, len(line)), line)
+ else:
+ yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
+ (lnum, pos), (lnum, len(line)), line)
+ continue
+
+ if column > indents[-1]: # count indents or dedents
+ indents.append(column)
+ yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
+ while column < indents[-1]:
+ if column not in indents:
+ raise IndentationError(
+ "unindent does not match any outer indentation level",
+ ("<tokenize>", lnum, pos, line))
+ indents = indents[:-1]
+ yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
+
+ else: # continued statement
+ if not line:
+ raise TokenError, ("EOF in multi-line statement", (lnum, 0))
+ continued = 0
+
+ while pos < max:
+ pseudomatch = pseudoprog.match(line, pos)
+ if pseudomatch: # scan for tokens
+ start, end = pseudomatch.span(1)
+ spos, epos, pos = (lnum, start), (lnum, end), end
+ token, initial = line[start:end], line[start]
+
+ if initial in numchars or \
+ (initial == '.' and token != '.'): # ordinary number
+ yield (NUMBER, token, spos, epos, line)
+ elif initial in '\r\n':
+ newline = NEWLINE
+ if parenlev > 0:
+ newline = NL
+ yield (newline, token, spos, epos, line)
+ elif initial == '#':
+ assert not token.endswith("\n")
+ yield (COMMENT, token, spos, epos, line)
+ elif token in triple_quoted:
+ endprog = endprogs[token]
+ endmatch = endprog.match(line, pos)
+ if endmatch: # all on one line
+ pos = endmatch.end(0)
+ token = line[start:pos]
+ yield (STRING, token, spos, (lnum, pos), line)
+ else:
+ strstart = (lnum, start) # multiple lines
+ contstr = line[start:]
+ contline = line
+ break
+ elif initial in single_quoted or \
+ token[:2] in single_quoted or \
+ token[:3] in single_quoted:
+ if token[-1] == '\n': # continued string
+ strstart = (lnum, start)
+ endprog = (endprogs[initial] or endprogs[token[1]] or
+ endprogs[token[2]])
+ contstr, needcont = line[start:], 1
+ contline = line
+ break
+ else: # ordinary string
+ yield (STRING, token, spos, epos, line)
+ elif initial in namechars: # ordinary name
+ yield (NAME, token, spos, epos, line)
+ elif initial == '\\': # continued stmt
+ # This yield is new; needed for better idempotency:
+ yield (NL, token, spos, (lnum, pos), line)
+ continued = 1
+ else:
+ if initial in '([{': parenlev = parenlev + 1
+ elif initial in ')]}': parenlev = parenlev - 1
+ yield (OP, token, spos, epos, line)
+ else:
+ yield (ERRORTOKEN, line[pos],
+ (lnum, pos), (lnum, pos+1), line)
+ pos = pos + 1
+
+ for indent in indents[1:]: # pop remaining indent levels
+ yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
+ yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
+
+if __name__ == '__main__': # testing
+ import sys
+ if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline)
+ else: tokenize(sys.stdin.readline)
diff --git a/sphinx/pycode/pytree.py b/sphinx/pycode/pytree.py
new file mode 100644
index 000000000..950319c59
--- /dev/null
+++ b/sphinx/pycode/pytree.py
@@ -0,0 +1,295 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Python parse tree definitions.
+
+This is a very concrete parse tree; we need to keep every token and
+even the comments and whitespace between tokens.
+
+Adapted for read-only nodes from pytree.py in Python's 2to3 tool, and
+added a few bits.
+"""
+
+__author__ = "Guido van Rossum <guido@python.org>"
+
+
+class Base(object):
+
+ """Abstract base class for Node and Leaf.
+
+ This provides some default functionality and boilerplate using the
+ template pattern.
+
+ A node may be a subnode of at most one parent.
+ """
+
+ # Default values for instance variables
+ type = None # int: token number (< 256) or symbol number (>= 256)
+ parent = None # Parent node pointer, or None
+ children = () # Tuple of subnodes
+ was_changed = False
+
+ def __new__(cls, *args, **kwds):
+ """Constructor that prevents Base from being instantiated."""
+ assert cls is not Base, "Cannot instantiate Base"
+ return object.__new__(cls)
+
+ def __eq__(self, other):
+ """Compares two nodes for equality.
+
+ This calls the method _eq().
+ """
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return self._eq(other)
+
+ def __ne__(self, other):
+ """Compares two nodes for inequality.
+
+ This calls the method _eq().
+ """
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return not self._eq(other)
+
+ def _eq(self, other):
+ """Compares two nodes for equality.
+
+ This is called by __eq__ and __ne__. It is only called if the
+ two nodes have the same type. This must be implemented by the
+ concrete subclass. Nodes should be considered equal if they
+ have the same structure, ignoring the prefix string and other
+ context information.
+ """
+ raise NotImplementedError
+
+ def get_lineno(self):
+ """Returns the line number which generated the invocant node."""
+ node = self
+ while not isinstance(node, Leaf):
+ if not node.children:
+ return
+ node = node.children[0]
+ return node.lineno
+
+ def get_next_sibling(self):
+ """Return the node immediately following the invocant in their
+ parent's children list. If the invocant does not have a next
+ sibling, return None."""
+ if self.parent is None:
+ return None
+
+ # Can't use index(); we need to test by identity
+ for i, child in enumerate(self.parent.children):
+ if child is self:
+ try:
+ return self.parent.children[i+1]
+ except IndexError:
+ return None
+
+ def get_prev_sibling(self):
+ """Return the node immediately preceding the invocant in their
+ parent's children list. If the invocant does not have a previous
+ sibling, return None."""
+ if self.parent is None:
+ return None
+
+ # Can't use index(); we need to test by identity
+ for i, child in enumerate(self.parent.children):
+ if child is self:
+ if i == 0:
+ return None
+ return self.parent.children[i-1]
+
+ def get_prev_leaf(self):
+ """Return the leaf node that precedes this node in the parse tree."""
+ def last_child(node):
+ if isinstance(node, Leaf):
+ return node
+ elif not node.children:
+ return None
+ else:
+ return last_child(node.children[-1])
+ if self.parent is None:
+ return None
+ prev = self.get_prev_sibling()
+ if isinstance(prev, Leaf):
+ return prev
+ elif prev is not None:
+ return last_child(prev)
+ return self.parent.get_prev_leaf()
+
+ def get_suffix(self):
+ """Return the string immediately following the invocant node. This
+ is effectively equivalent to node.get_next_sibling().get_prefix()"""
+ next_sib = self.get_next_sibling()
+ if next_sib is None:
+ return ""
+ return next_sib.get_prefix()
+
+
+class Node(Base):
+
+ """Concrete implementation for interior nodes."""
+
+ def __init__(self, type, children, context=None, prefix=None):
+ """Initializer.
+
+ Takes a type constant (a symbol number >= 256), a sequence of
+ child nodes, and an optional context keyword argument.
+
+ As a side effect, the parent pointers of the children are updated.
+ """
+ assert type >= 256, type
+ self.type = type
+ self.children = list(children)
+ for ch in self.children:
+ assert ch.parent is None, repr(ch)
+ ch.parent = self
+ if prefix is not None:
+ self.set_prefix(prefix)
+
+ def __repr__(self):
+ return "%s(%s, %r)" % (self.__class__.__name__,
+ self.type, self.children)
+
+ def __str__(self):
+ """This reproduces the input source exactly."""
+ return "".join(map(str, self.children))
+
+ def compact(self):
+ return ''.join(child.compact() for child in self.children)
+
+ def __getitem__(self, index):
+ return self.children[index]
+
+ def __iter__(self):
+ return iter(self.children)
+
+ def _eq(self, other):
+ """Compares two nodes for equality."""
+ return (self.type, self.children) == (other.type, other.children)
+
+ def post_order(self):
+ """Returns a post-order iterator for the tree."""
+ for child in self.children:
+ for node in child.post_order():
+ yield node
+ yield self
+
+ def pre_order(self):
+ """Returns a pre-order iterator for the tree."""
+ yield self
+ for child in self.children:
+ for node in child.post_order():
+ yield node
+
+ def get_prefix(self):
+ """Returns the prefix for the node.
+
+ This passes the call on to the first child.
+ """
+ if not self.children:
+ return ""
+ return self.children[0].get_prefix()
+
+
+class Leaf(Base):
+
+ """Concrete implementation for leaf nodes."""
+
+ # Default values for instance variables
+ prefix = "" # Whitespace and comments preceding this token in the input
+ lineno = 0 # Line where this token starts in the input
+ column = 0 # Column where this token tarts in the input
+
+ def __init__(self, type, value, context=None, prefix=None):
+ """Initializer.
+
+ Takes a type constant (a token number < 256), a string value,
+ and an optional context keyword argument.
+ """
+ assert 0 <= type < 256, type
+ if context is not None:
+ self.prefix, (self.lineno, self.column) = context
+ self.type = type
+ self.value = value
+ if prefix is not None:
+ self.prefix = prefix
+
+ def __repr__(self):
+ return "%s(%r, %r)" % (self.__class__.__name__,
+ self.type, self.value)
+
+ def __str__(self):
+ """This reproduces the input source exactly."""
+ return self.prefix + str(self.value)
+
+ def compact(self):
+ return str(self.value)
+
+ def _eq(self, other):
+ """Compares two nodes for equality."""
+ return (self.type, self.value) == (other.type, other.value)
+
+ def post_order(self):
+ """Returns a post-order iterator for the tree."""
+ yield self
+
+ def pre_order(self):
+ """Returns a pre-order iterator for the tree."""
+ yield self
+
+ def get_prefix(self):
+ """Returns the prefix for the node."""
+ return self.prefix
+
+
+def convert(grammar, raw_node):
+ """Convert raw node to a Node or Leaf instance."""
+ type, value, context, children = raw_node
+ if children or type in grammar.number2symbol:
+ # If there's exactly one child, return that child instead of
+ # creating a new node.
+ if len(children) == 1:
+ return children[0]
+ return Node(type, children, context=context)
+ else:
+ return Leaf(type, value, context=context)
+
+
+def nice_repr(node, number2name, prefix=False):
+ def _repr(node):
+ if isinstance(node, Leaf):
+ return "%s(%r)" % (number2name[node.type], node.value)
+ else:
+ return "%s(%s)" % (number2name[node.type],
+ ', '.join(map(_repr, node.children)))
+ def _prepr(node):
+ if isinstance(node, Leaf):
+ return "%s(%r, %r)" % (number2name[node.type], node.prefix, node.value)
+ else:
+ return "%s(%s)" % (number2name[node.type],
+ ', '.join(map(_prepr, node.children)))
+ return (prefix and _prepr or _repr)(node)
+
+
+class NodeVisitor(object):
+ def __init__(self, number2name):
+ self.number2name = number2name
+ self.init()
+
+ def init(self):
+ pass
+
+ def visit(self, node):
+ """Visit a node."""
+ method = 'visit_' + self.number2name[node.type]
+ visitor = getattr(self, method, self.generic_visit)
+ return visitor(node)
+
+ def generic_visit(self, node):
+ """Called if no explicit visitor function exists for a node."""
+ if isinstance(node, Node):
+ for child in node:
+ self.visit(child)