summaryrefslogtreecommitdiff
path: root/sphinx/io.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/io.py')
-rw-r--r--sphinx/io.py246
1 files changed, 178 insertions, 68 deletions
diff --git a/sphinx/io.py b/sphinx/io.py
index 8365e22e0..2d584de91 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -8,10 +8,15 @@
:copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-from docutils.io import FileInput
+import re
+import codecs
+
+from docutils.io import FileInput, NullOutput
+from docutils.core import Publisher
from docutils.readers import standalone
+from docutils.statemachine import StringList, string2lines
from docutils.writers import UnfilteredWriter
-from six import string_types, text_type, iteritems
+from six import text_type
from typing import Any, Union # NOQA
from sphinx.transforms import (
@@ -24,7 +29,7 @@ from sphinx.transforms.compact_bullet_list import RefOnlyBulletListTransform
from sphinx.transforms.i18n import (
PreserveTranslatableMessages, Locale, RemoveTranslatableInline,
)
-from sphinx.util import import_object, split_docinfo
+from sphinx.util import logging
from sphinx.util.docutils import LoggingReporter
if False:
@@ -38,41 +43,18 @@ if False:
from sphinx.builders import Builder # NOQA
from sphinx.environment import BuildEnvironment # NOQA
+docinfo_re = re.compile(':\\w+:.*?')
+
+
+logger = logging.getLogger(__name__)
+
class SphinxBaseReader(standalone.Reader):
"""
- Add our source parsers
- """
- def __init__(self, app, parsers={}, *args, **kwargs):
- # type: (Sphinx, Dict[unicode, Parser], Any, Any) -> None
- standalone.Reader.__init__(self, *args, **kwargs)
- self.parser_map = {} # type: Dict[unicode, Parser]
- for suffix, parser_class in parsers.items():
- if isinstance(parser_class, string_types):
- parser_class = import_object(parser_class, 'source parser') # type: ignore
- parser = parser_class()
- if hasattr(parser, 'set_application'):
- parser.set_application(app)
- self.parser_map[suffix] = parser
-
- def read(self, source, parser, settings):
- # type: (Input, Parser, Dict) -> nodes.document
- self.source = source
-
- for suffix in self.parser_map:
- if source.source_path.endswith(suffix):
- self.parser = self.parser_map[suffix]
- break
- else:
- # use special parser for unknown file-extension '*' (if exists)
- self.parser = self.parser_map.get('*')
+ A base class of readers for Sphinx.
- if not self.parser:
- self.parser = parser
- self.settings = settings
- self.input = self.source.read()
- self.parse()
- return self.document
+ This replaces reporter by Sphinx's on generating document.
+ """
def get_transforms(self):
# type: () -> List[Transform]
@@ -80,50 +62,59 @@ class SphinxBaseReader(standalone.Reader):
def new_document(self):
# type: () -> nodes.document
+ """Creates a new document object which having a special reporter object good
+ for logging.
+ """
document = standalone.Reader.new_document(self)
reporter = document.reporter
- document.reporter = LoggingReporter(reporter.source, reporter.report_level,
- reporter.halt_level, reporter.debug_flag,
- reporter.error_handler)
+ document.reporter = LoggingReporter.from_reporter(reporter)
+ document.reporter.set_source(self.source)
return document
class SphinxStandaloneReader(SphinxBaseReader):
"""
- Add our own transforms.
+ A basic document reader for Sphinx.
"""
transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, PreserveTranslatableMessages,
Locale, CitationReferences, DefaultSubstitutions, MoveModuleTargets,
HandleCodeBlocks, AutoNumbering, AutoIndexUpgrader, SortIds,
RemoveTranslatableInline, PreserveTranslatableMessages, FilterSystemMessages,
- RefOnlyBulletListTransform, UnreferencedFootnotesDetector]
+ RefOnlyBulletListTransform, UnreferencedFootnotesDetector
+ ] # type: List[Transform]
+
+ def __init__(self, app, *args, **kwargs):
+ # type: (Sphinx, Any, Any) -> None
+ self.transforms = self.transforms + app.registry.get_transforms()
+ SphinxBaseReader.__init__(self, *args, **kwargs) # type: ignore
class SphinxI18nReader(SphinxBaseReader):
"""
- Replacer for document.reporter.get_source_and_line method.
+ A document reader for i18n.
- reST text lines for translation do not have the original source line number.
- This class provides the correct line numbers when reporting.
+ This returns the source line number of original text as current source line number
+ to let users know where the error happened.
+ Because the translated texts are partial and they don't have correct line numbers.
"""
+ lineno = None # type: int
transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks,
AutoNumbering, SortIds, RemoveTranslatableInline,
FilterSystemMessages, RefOnlyBulletListTransform,
UnreferencedFootnotesDetector]
- def __init__(self, *args, **kwargs):
- # type: (Any, Any) -> None
- SphinxBaseReader.__init__(self, *args, **kwargs)
- self.lineno = None # type: int
-
def set_lineno_for_reporter(self, lineno):
# type: (int) -> None
+ """Stores the source line number of original text."""
self.lineno = lineno
def new_document(self):
# type: () -> nodes.document
+ """Creates a new document object which having a special reporter object for
+ translation.
+ """
document = SphinxBaseReader.new_document(self)
reporter = document.reporter
@@ -136,6 +127,8 @@ class SphinxI18nReader(SphinxBaseReader):
class SphinxDummyWriter(UnfilteredWriter):
+ """Dummy writer module used for generating doctree."""
+
supported = ('html',) # needed to keep "meta" nodes
def translate(self):
@@ -143,11 +136,26 @@ class SphinxDummyWriter(UnfilteredWriter):
pass
-class SphinxFileInput(FileInput):
+def SphinxDummySourceClass(source, *args, **kwargs):
+ """Bypass source object as is to cheat Publisher."""
+ return source
+
+
+class SphinxBaseFileInput(FileInput):
+ """A base class of SphinxFileInput.
+
+ It supports to replace unknown Unicode characters to '?'. And it also emits
+ Sphinx events ``source-read`` on reading.
+ """
+
def __init__(self, app, env, *args, **kwds):
# type: (Sphinx, BuildEnvironment, Any, Any) -> None
self.app = app
self.env = env
+
+ # set up error handler
+ codecs.register_error('sphinx', self.warn_and_replace) # type: ignore
+
kwds['error_handler'] = 'sphinx' # py3: handle error on open.
FileInput.__init__(self, *args, **kwds)
@@ -159,25 +167,127 @@ class SphinxFileInput(FileInput):
def read(self):
# type: () -> unicode
- def get_parser_type(source_path):
- # type: (unicode) -> Tuple[unicode]
- for suffix, parser_class in iteritems(self.app.registry.get_source_parsers()):
- if source_path.endswith(suffix):
- if isinstance(parser_class, string_types):
- parser_class = import_object(parser_class, 'source parser') # type: ignore # NOQA
- return parser_class.supported
- else:
- return ('restructuredtext',)
+ """Reads the contents from file.
+ After reading, it emits Sphinx event ``source-read``.
+ """
data = FileInput.read(self)
- if self.app:
- arg = [data]
- self.app.emit('source-read', self.env.docname, arg)
- data = arg[0]
- docinfo, data = split_docinfo(data)
- if 'restructuredtext' in get_parser_type(self.source_path):
- if self.env.config.rst_epilog:
- data = data + '\n' + self.env.config.rst_epilog + '\n'
- if self.env.config.rst_prolog:
- data = self.env.config.rst_prolog + '\n' + data
- return docinfo + data
+
+ # emit source-read event
+ arg = [data]
+ self.app.emit('source-read', self.env.docname, arg)
+ return arg[0]
+
+ def warn_and_replace(self, error):
+ # type: (Any) -> Tuple
+ """Custom decoding error handler that warns and replaces."""
+ linestart = error.object.rfind(b'\n', 0, error.start)
+ lineend = error.object.find(b'\n', error.start)
+ if lineend == -1:
+ lineend = len(error.object)
+ lineno = error.object.count(b'\n', 0, error.start) + 1
+ logger.warning('undecodable source characters, replacing with "?": %r',
+ (error.object[linestart + 1:error.start] + b'>>>' +
+ error.object[error.start:error.end] + b'<<<' +
+ error.object[error.end:lineend]),
+ location=(self.env.docname, lineno))
+ return (u'?', error.end)
+
+
+class SphinxFileInput(SphinxBaseFileInput):
+ """A basic FileInput for Sphinx."""
+ pass
+
+
+class SphinxRSTFileInput(SphinxBaseFileInput):
+ """A reST FileInput for Sphinx.
+
+ This FileInput automatically prepends and appends text by :confval:`rst_prolog` and
+ :confval:`rst_epilog`.
+
+ .. important::
+
+ This FileInput uses an instance of ``StringList`` as a return value of ``read()``
+ method to indicate original source filename and line numbers after prepending and
+ appending.
+ For that reason, ``sphinx.parsers.RSTParser`` should be used with this to parse
+ a content correctly.
+ """
+
+ def prepend_prolog(self, text, prolog):
+ # type: (StringList, unicode) -> None
+ docinfo = self.count_docinfo_lines(text)
+ if docinfo:
+ # insert a blank line after docinfo
+ text.insert(docinfo, '', '<generated>', 0)
+ docinfo += 1
+
+ # insert prolog (after docinfo if exists)
+ for lineno, line in enumerate(prolog.splitlines()):
+ text.insert(docinfo + lineno, line, '<rst_prolog>', lineno)
+
+ text.insert(docinfo + lineno + 1, '', '<generated>', 0)
+
+ def append_epilog(self, text, epilog):
+ # type: (StringList, unicode) -> None
+ # append a blank line and rst_epilog
+ text.append('', '<generated>', 0)
+ for lineno, line in enumerate(epilog.splitlines()):
+ text.append(line, '<rst_epilog>', lineno)
+
+ def read(self):
+ # type: () -> StringList
+ inputstring = SphinxBaseFileInput.read(self)
+ lines = string2lines(inputstring, convert_whitespace=True)
+ content = StringList()
+ for lineno, line in enumerate(lines):
+ content.append(line, self.source_path, lineno)
+
+ if self.env.config.rst_prolog:
+ self.prepend_prolog(content, self.env.config.rst_prolog)
+ if self.env.config.rst_epilog:
+ self.append_epilog(content, self.env.config.rst_epilog)
+
+ return content
+
+ def count_docinfo_lines(self, content):
+ # type: (StringList) -> int
+ if len(content) == 0:
+ return 0
+ else:
+ for lineno, line in enumerate(content.data):
+ if not docinfo_re.match(line):
+ break
+ return lineno
+
+
+def read_doc(app, env, filename):
+ # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
+ """Parse a document and convert to doctree."""
+ input_class = app.registry.get_source_input(filename)
+ reader = SphinxStandaloneReader(app)
+ source = input_class(app, env, source=None, source_path=filename,
+ encoding=env.config.source_encoding)
+ parser = app.registry.create_source_parser(app, filename)
+
+ pub = Publisher(reader=reader,
+ parser=parser,
+ writer=SphinxDummyWriter(),
+ source_class=SphinxDummySourceClass,
+ destination=NullOutput())
+ pub.set_components(None, 'restructuredtext', None)
+ pub.process_programmatic_settings(None, env.settings, None)
+ pub.set_source(source, filename)
+ pub.publish()
+ return pub.document
+
+
+def setup(app):
+ app.registry.add_source_input('*', SphinxFileInput)
+ app.registry.add_source_input('restructuredtext', SphinxRSTFileInput)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }