summaryrefslogtreecommitdiff
path: root/sphinx/pycode
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2009-01-04 20:02:24 +0100
committerGeorg Brandl <georg@python.org>2009-01-04 20:02:24 +0100
commit93c8edb25dbab5c219258662ab750aa346aeb0e9 (patch)
tree045aed7fe2c72e71a3d68225c75318e7691f2233 /sphinx/pycode
parent686c154eea969fe7eecc4aec27d922d301be46fc (diff)
downloadsphinx-93c8edb25dbab5c219258662ab750aa346aeb0e9.tar.gz
Add support for decoding strings and comments to the analyzer.
Diffstat (limited to 'sphinx/pycode')
-rw-r--r--sphinx/pycode/__init__.py86
-rw-r--r--sphinx/pycode/pgen2/literals.py4
2 files changed, 59 insertions, 31 deletions
diff --git a/sphinx/pycode/__init__.py b/sphinx/pycode/__init__.py
index 0141ede4..17dc6afb 100644
--- a/sphinx/pycode/__init__.py
+++ b/sphinx/pycode/__init__.py
@@ -9,6 +9,7 @@
:license: BSD, see LICENSE for details.
"""
+import re
import sys
from os import path
from cStringIO import StringIO
@@ -35,6 +36,9 @@ number2name = pygrammar.number2symbol.copy()
number2name.update(token.tok_name)
+# a regex to recognize coding cookies
+_coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
+
_eq = nodes.Leaf(token.EQUAL, '=')
@@ -46,8 +50,9 @@ class AttrDocVisitor(nodes.NodeVisitor):
The docstrings can either be in special '#:' comments before the assignment
or in a docstring after it.
"""
- def init(self, scope):
+ def init(self, scope, encoding):
self.scope = scope
+ self.encoding = encoding
self.namespace = []
self.collected = {}
@@ -71,6 +76,7 @@ class AttrDocVisitor(nodes.NodeVisitor):
if not pnode or pnode.type not in (token.INDENT, token.DEDENT):
break
prefix = pnode.get_prefix()
+ prefix = prefix.decode(self.encoding)
docstring = prepare_commentdoc(prefix)
if docstring:
self.add_docstring(node, docstring)
@@ -86,7 +92,8 @@ class AttrDocVisitor(nodes.NodeVisitor):
if prev.type == sym.simple_stmt and \
prev[0].type == sym.expr_stmt and _eq in prev[0].children:
# need to "eval" the string because it's returned in its original form
- docstring = prepare_docstring(literals.evalString(node[0].value))
+ docstring = literals.evalString(node[0].value, self.encoding)
+ docstring = prepare_docstring(docstring)
self.add_docstring(prev[0], docstring)
def visit_funcdef(self, node):
@@ -136,38 +143,48 @@ class ModuleAnalyzer(object):
@classmethod
def for_module(cls, modname):
if ('module', modname) in cls.cache:
- return cls.cache['module', modname]
- if modname not in sys.modules:
- try:
- __import__(modname)
- except ImportError, err:
- raise PycodeError('error importing %r' % modname, err)
- mod = sys.modules[modname]
- if hasattr(mod, '__loader__'):
- try:
- source = mod.__loader__.get_source(modname)
- except Exception, err:
- raise PycodeError('error getting source for %r' % modname, err)
- obj = cls.for_string(source, modname)
- cls.cache['module', modname] = obj
- return obj
- filename = getattr(mod, '__file__', None)
- if filename is None:
- raise PycodeError('no source found for module %r' % modname)
- filename = path.normpath(filename)
- lfilename = filename.lower()
- if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
- filename = filename[:-1]
- elif not lfilename.endswith('.py'):
- raise PycodeError('source is not a .py file: %r' % filename)
- if not path.isfile(filename):
- raise PycodeError('source file is not present: %r' % filename)
- obj = cls.for_file(filename, modname)
+ entry = cls.cache['module', modname]
+ if isinstance(entry, PycodeError):
+ raise entry
+ return entry
+
+ try:
+ if modname not in sys.modules:
+ try:
+ __import__(modname)
+ except ImportError, err:
+ raise PycodeError('error importing %r' % modname, err)
+ mod = sys.modules[modname]
+ if hasattr(mod, '__loader__'):
+ try:
+ source = mod.__loader__.get_source(modname)
+ except Exception, err:
+ raise PycodeError('error getting source for %r' % modname, err)
+ obj = cls.for_string(source, modname)
+ cls.cache['module', modname] = obj
+ return obj
+ filename = getattr(mod, '__file__', None)
+ if filename is None:
+ raise PycodeError('no source found for module %r' % modname)
+ filename = path.normpath(filename)
+ lfilename = filename.lower()
+ if lfilename.endswith('.pyo') or lfilename.endswith('.pyc'):
+ filename = filename[:-1]
+ elif not lfilename.endswith('.py'):
+ raise PycodeError('source is not a .py file: %r' % filename)
+ if not path.isfile(filename):
+ raise PycodeError('source file is not present: %r' % filename)
+ obj = cls.for_file(filename, modname)
+ except PycodeError, err:
+ cls.cache['module', modname] = err
+ raise
cls.cache['module', modname] = obj
return obj
def __init__(self, source, modname, srcname):
+ # name of the module
self.modname = modname
+ # name of the source file
self.srcname = srcname
# file-like object yielding source lines
self.source = source
@@ -194,13 +211,22 @@ class ModuleAnalyzer(object):
return
self.tokenize()
self.parsetree = pydriver.parse_tokens(self.tokens)
+ # find the source code encoding
+ encoding = sys.getdefaultencoding()
+ comments = self.parsetree.get_prefix()
+ for line in comments.splitlines()[:2]:
+ match = _coding_re.search(line)
+ if match is not None:
+ encoding = match.group(1)
+ break
+ self.encoding = encoding
def find_attr_docs(self, scope=''):
"""Find class and module-level attributes and their documentation."""
if self.attr_docs is not None:
return self.attr_docs
self.parse()
- attr_visitor = AttrDocVisitor(number2name, scope)
+ attr_visitor = AttrDocVisitor(number2name, scope, self.encoding)
attr_visitor.visit(self.parsetree)
self.attr_docs = attr_visitor.collected
return attr_visitor.collected
diff --git a/sphinx/pycode/pgen2/literals.py b/sphinx/pycode/pgen2/literals.py
index 78667df0..31900291 100644
--- a/sphinx/pycode/pgen2/literals.py
+++ b/sphinx/pycode/pgen2/literals.py
@@ -63,9 +63,11 @@ escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})")
uni_escape_re = re.compile(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3}|"
r"u[0-9a-fA-F]{0,4}|U[0-9a-fA-F]{0,8}|N\{.+?\})")
-def evalString(s):
+def evalString(s, encoding=None):
regex = escape_re
repl = escape
+ if encoding:
+ s = s.decode(encoding)
if s.startswith('u') or s.startswith('U'):
regex = uni_escape_re
s = s[1:]