1 files changed, 82 insertions, 89 deletions
diff --git a/sphinx/io.py b/sphinx/io.py
index bb77db669..3544a751b 100644
--- a/sphinx/io.py
+++ b/sphinx/io.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
     sphinx.io
     ~~~~~~~~~
@@ -9,7 +8,6 @@
     :license: BSD, see LICENSE for details.
 """
 import codecs
-import re
 import warnings
 
 from docutils.core import Publisher
@@ -18,11 +16,9 @@ from docutils.parsers.rst import Parser as RSTParser
 from docutils.readers import standalone
 from docutils.statemachine import StringList, string2lines
 from docutils.writers import UnfilteredWriter
-from six import text_type, iteritems
 from typing import Any, Union  # NOQA
 
 from sphinx.deprecation import RemovedInSphinx30Warning
-from sphinx.locale import __
 from sphinx.transforms import (
     ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
     DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks, SortIds,
@@ -36,13 +32,16 @@ from sphinx.transforms.i18n import (
 )
 from sphinx.transforms.references import SphinxDomains, SubstitutionDefinitionsRemover
 from sphinx.util import logging
+from sphinx.util import UnicodeDecodeErrorHandler
 from sphinx.util.docutils import LoggingReporter
+from sphinx.util.rst import append_epilog, docinfo_re, prepend_prolog
 from sphinx.versioning import UIDTransform
 
 if False:
     # For type annotation
-    from typing import Any, Dict, List, Tuple, Union  # NOQA
+    from typing import Any, Dict, List, Tuple, Type, Union  # NOQA
     from docutils import nodes  # NOQA
+    from docutils.frontend import Values  # NOQA
     from docutils.io import Input  # NOQA
     from docutils.parsers import Parser  # NOQA
     from docutils.transforms import Transform  # NOQA
@@ -50,8 +49,6 @@ if False:
     from sphinx.builders import Builder  # NOQA
     from sphinx.environment import BuildEnvironment  # NOQA
 
-docinfo_re = re.compile(':\\w+:.*?')
-
 
 logger = logging.getLogger(__name__)
 
@@ -63,21 +60,24 @@ class SphinxBaseReader(standalone.Reader):
     This replaces reporter by Sphinx's on generating document.
     """
 
+    transforms = []  # type: List[Type[Transform]]
+
     def __init__(self, app, *args, **kwargs):
         # type: (Sphinx, Any, Any) -> None
+        self.app = app
         self.env = app.env
-        standalone.Reader.__init__(self, *args, **kwargs)
+        super().__init__(*args, **kwargs)
 
     def get_transforms(self):
-        # type: () -> List[Transform]
-        return standalone.Reader.get_transforms(self) + self.transforms
+        # type: () -> List[Type[Transform]]
+        return super().get_transforms() + self.transforms
 
     def new_document(self):
         # type: () -> nodes.document
         """Creates a new document object which having a special reporter object good
         for logging.
         """
-        document = standalone.Reader.new_document(self)
+        document = super().new_document()
 
         # substitute transformer
         document.transformer = SphinxTransformer(document)
@@ -100,13 +100,32 @@ class SphinxStandaloneReader(SphinxBaseReader):
                   RemoveTranslatableInline, FilterSystemMessages, RefOnlyBulletListTransform,
                   UnreferencedFootnotesDetector, SphinxSmartQuotes, ManpageLink,
                   SphinxDomains, SubstitutionDefinitionsRemover, DoctreeReadEvent,
-                  UIDTransform,
-                  ]  # type: List[Transform]
+                  UIDTransform]
 
     def __init__(self, app, *args, **kwargs):
         # type: (Sphinx, Any, Any) -> None
         self.transforms = self.transforms + app.registry.get_transforms()
-        SphinxBaseReader.__init__(self, app, *args, **kwargs)
+        super().__init__(app, *args, **kwargs)
+
+    def read(self, source, parser, settings):
+        # type: (Input, Parser, Values) -> nodes.document
+        self.source = source
+        if not self.parser:
+            self.parser = parser
+        self.settings = settings
+        self.input = self.read_source()
+        self.parse()
+        return self.document
+
+    def read_source(self):
+        # type: () -> str
+        """Read content from source and do post-process."""
+        content = self.source.read()
+
+        # emit "source-read" event
+        arg = [content]
+        self.app.emit('source-read', self.env.docname, arg)
+        return arg[0]
 
 
 class SphinxI18nReader(SphinxBaseReader):
@@ -158,8 +177,7 @@ def SphinxDummySourceClass(source, *args, **kwargs):
 class SphinxBaseFileInput(FileInput):
     """A base class of SphinxFileInput.
 
-    It supports to replace unknown Unicode characters to '?'. And it also emits
-    Sphinx events :event:`source-read` on reading.
+    It supports to replace unknown Unicode characters to '?'.
     """
 
     def __init__(self, app, env, *args, **kwds):
@@ -167,50 +185,25 @@ class SphinxBaseFileInput(FileInput):
         self.app = app
         self.env = env
 
-        # set up error handler
-        codecs.register_error('sphinx', self.warn_and_replace)  # type: ignore
+        warnings.warn('%s is deprecated.' % self.__class__.__name__,
+                      RemovedInSphinx30Warning, stacklevel=2)
 
         kwds['error_handler'] = 'sphinx'  # py3: handle error on open.
-        FileInput.__init__(self, *args, **kwds)
-
-    def decode(self, data):
-        # type: (Union[unicode, bytes]) -> unicode
-        if isinstance(data, text_type):  # py3: `data` already decoded.
-            return data
-        return data.decode(self.encoding, 'sphinx')  # py2: decoding
-
-    def read(self):
-        # type: () -> unicode
-        """Reads the contents from file.
-
-        After reading, it emits Sphinx event ``source-read``.
-        """
-        data = FileInput.read(self)
-
-        # emit source-read event
-        arg = [data]
-        self.app.emit('source-read', self.env.docname, arg)
-        return arg[0]
+        super().__init__(*args, **kwds)
 
     def warn_and_replace(self, error):
         # type: (Any) -> Tuple
-        """Custom decoding error handler that warns and replaces."""
-        linestart = error.object.rfind(b'\n', 0, error.start)
-        lineend = error.object.find(b'\n', error.start)
-        if lineend == -1:
-            lineend = len(error.object)
-        lineno = error.object.count(b'\n', 0, error.start) + 1
-        logger.warning(__('undecodable source characters, replacing with "?": %r'),
-                       (error.object[linestart + 1:error.start] + b'>>>' +
-                        error.object[error.start:error.end] + b'<<<' +
-                        error.object[error.end:lineend]),
-                       location=(self.env.docname, lineno))
-        return (u'?', error.end)
-
-
-class SphinxFileInput(SphinxBaseFileInput):
+        return UnicodeDecodeErrorHandler(self.env.docname)(error)
+
+
+class SphinxFileInput(FileInput):
     """A basic FileInput for Sphinx."""
-    supported = ('*',)  # special source input
+    supported = ('*',)  # RemovedInSphinx30Warning
+
+    def __init__(self, *args, **kwargs):
+        # type: (Any, Any) -> None
+        kwargs['error_handler'] = 'sphinx'
+        super(SphinxFileInput, self).__init__(*args, **kwargs)
 
 
 class SphinxRSTFileInput(SphinxBaseFileInput):
@@ -230,7 +223,7 @@ class SphinxRSTFileInput(SphinxBaseFileInput):
     supported = ('restructuredtext',)
 
     def prepend_prolog(self, text, prolog):
-        # type: (StringList, unicode) -> None
+        # type: (StringList, str) -> None
         docinfo = self.count_docinfo_lines(text)
         if docinfo:
             # insert a blank line after docinfo
@@ -244,24 +237,22 @@ class SphinxRSTFileInput(SphinxBaseFileInput):
         text.insert(docinfo + lineno + 1, '', '<generated>', 0)
 
     def append_epilog(self, text, epilog):
-        # type: (StringList, unicode) -> None
+        # type: (StringList, str) -> None
         # append a blank line and rst_epilog
         text.append('', '<generated>', 0)
         for lineno, line in enumerate(epilog.splitlines()):
             text.append(line, '<rst_epilog>', lineno)
 
-    def read(self):
+    def read(self):  # type: ignore
         # type: () -> StringList
-        inputstring = SphinxBaseFileInput.read(self)
+        inputstring = super().read()
         lines = string2lines(inputstring, convert_whitespace=True)
         content = StringList()
         for lineno, line in enumerate(lines):
             content.append(line, self.source_path, lineno)
 
-        if self.env.config.rst_prolog:
-            self.prepend_prolog(content, self.env.config.rst_prolog)
-        if self.env.config.rst_epilog:
-            self.append_epilog(content, self.env.config.rst_epilog)
+        prepend_prolog(content, self.env.config.rst_prolog)
+        append_epilog(content, self.env.config.rst_epilog)
 
         return content
 
@@ -281,8 +272,8 @@ class FiletypeNotFoundError(Exception):
 
 
 def get_filetype(source_suffix, filename):
-    # type: (Dict[unicode, unicode], unicode) -> unicode
-    for suffix, filetype in iteritems(source_suffix):
+    # type: (Dict[str, str], str) -> str
+    for suffix, filetype in source_suffix.items():
         if filename.endswith(suffix):
             # If default filetype (None), considered as restructuredtext.
             return filetype or 'restructuredtext'
@@ -291,13 +282,14 @@ def get_filetype(source_suffix, filename):
 
 
 def read_doc(app, env, filename):
-    # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
+    # type: (Sphinx, BuildEnvironment, str) -> nodes.document
     """Parse a document and convert to doctree."""
-    filetype = get_filetype(app.config.source_suffix, filename)
-    input_class = app.registry.get_source_input(filetype)
+    # set up error_handler for the target document
+    error_handler = UnicodeDecodeErrorHandler(env.docname)
+    codecs.register_error('sphinx', error_handler)  # type: ignore
+
     reader = SphinxStandaloneReader(app)
-    source = input_class(app, env, source=None, source_path=filename,
-                         encoding=env.config.source_encoding)
+    filetype = get_filetype(app.config.source_suffix, filename)
     parser = app.registry.create_source_parser(app, filetype)
     if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
         # a workaround for recommonmark
@@ -307,25 +299,26 @@ def read_doc(app, env, filename):
         #   CommonMarkParser.
         parser.settings_spec = RSTParser.settings_spec
 
-    pub = Publisher(reader=reader,
-                    parser=parser,
-                    writer=SphinxDummyWriter(),
-                    source_class=SphinxDummySourceClass,
-                    destination=NullOutput())
-    pub.set_components(None, 'restructuredtext', None)
-    pub.process_programmatic_settings(None, env.settings, None)
-    pub.set_source(source, filename)
+    input_class = app.registry.get_source_input(filetype)
+    if input_class:
+        # Sphinx-1.8 style
+        source = input_class(app, env, source=None, source_path=filename,  # type: ignore
+                             encoding=env.config.source_encoding)
+        pub = Publisher(reader=reader,  # type: ignore
+                        parser=parser,
+                        writer=SphinxDummyWriter(),
+                        source_class=SphinxDummySourceClass,
+                        destination=NullOutput())
+        pub.process_programmatic_settings(None, env.settings, None)
+        pub.set_source(source, filename)
+    else:
+        # Sphinx-2.0 style
+        pub = Publisher(reader=reader,
+                        parser=parser,
+                        writer=SphinxDummyWriter(),
+                        source_class=SphinxFileInput)
+        pub.process_programmatic_settings(None, env.settings, None)
+        pub.set_source(source_path=filename)
+
     pub.publish()
     return pub.document
-
-
-def setup(app):
-    # type: (Sphinx) -> Dict[unicode, Any]
-    app.registry.add_source_input(SphinxFileInput)
-    app.registry.add_source_input(SphinxRSTFileInput)
-
-    return {
-        'version': 'builtin',
-        'parallel_read_safe': True,
-        'parallel_write_safe': True,
-    }