sphinx/io.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

# -*- coding: utf-8 -*-
"""
    sphinx.io
    ~~~~~~~~~

    Input/Output files

    :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""
import codecs

from docutils.io import FileInput, NullOutput
from docutils.core import Publisher
from docutils.readers import standalone
from docutils.writers import UnfilteredWriter
from six import string_types, text_type, iteritems
from typing import Any, Union  # NOQA

from sphinx.transforms import (
    ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
    DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks, SortIds,
    AutoNumbering, AutoIndexUpgrader, FilterSystemMessages,
    UnreferencedFootnotesDetector
)
from sphinx.transforms.compact_bullet_list import RefOnlyBulletListTransform
from sphinx.transforms.i18n import (
    PreserveTranslatableMessages, Locale, RemoveTranslatableInline,
)
from sphinx.util import logging
from sphinx.util import import_object, split_docinfo
from sphinx.util.docutils import LoggingReporter

if False:
    # For type annotation
    from typing import Any, Dict, List, Tuple, Union  # NOQA
    from docutils import nodes  # NOQA
    from docutils.io import Input  # NOQA
    from docutils.parsers import Parser  # NOQA
    from docutils.transforms import Transform  # NOQA
    from sphinx.application import Sphinx  # NOQA
    from sphinx.builders import Builder  # NOQA
    from sphinx.environment import BuildEnvironment  # NOQA


logger = logging.getLogger(__name__)


class SphinxBaseReader(standalone.Reader):
    """
    Add our source parsers
    """
    def get_transforms(self):
        # type: () -> List[Transform]
        return standalone.Reader.get_transforms(self) + self.transforms

    def new_document(self):
        # type: () -> nodes.document
        document = standalone.Reader.new_document(self)
        reporter = document.reporter
        document.reporter = LoggingReporter(reporter.source, reporter.report_level,
                                            reporter.halt_level, reporter.debug_flag,
                                            reporter.error_handler)
        return document


class SphinxStandaloneReader(SphinxBaseReader):
    """
    Add our own transforms.
    """
    transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, PreserveTranslatableMessages,
                  Locale, CitationReferences, DefaultSubstitutions, MoveModuleTargets,
                  HandleCodeBlocks, AutoNumbering, AutoIndexUpgrader, SortIds,
                  RemoveTranslatableInline, PreserveTranslatableMessages, FilterSystemMessages,
                  RefOnlyBulletListTransform, UnreferencedFootnotesDetector]


class SphinxI18nReader(SphinxBaseReader):
    """
    Replacer for document.reporter.get_source_and_line method.

    reST text lines for translation do not have the original source line number.
    This class provides the correct line numbers when reporting.
    """

    lineno = None  # type: int
    transforms = [ApplySourceWorkaround, ExtraTranslatableNodes, CitationReferences,
                  DefaultSubstitutions, MoveModuleTargets, HandleCodeBlocks,
                  AutoNumbering, SortIds, RemoveTranslatableInline,
                  FilterSystemMessages, RefOnlyBulletListTransform,
                  UnreferencedFootnotesDetector]

    def set_lineno_for_reporter(self, lineno):
        # type: (int) -> None
        self.lineno = lineno

    def new_document(self):
        # type: () -> nodes.document
        document = SphinxBaseReader.new_document(self)
        reporter = document.reporter

        def get_source_and_line(lineno=None):
            # type: (int) -> Tuple[unicode, int]
            return reporter.source, self.lineno

        reporter.get_source_and_line = get_source_and_line
        return document


class SphinxDummyWriter(UnfilteredWriter):
    supported = ('html',)  # needed to keep "meta" nodes

    def translate(self):
        # type: () -> None
        pass


def SphinxDummySourceClass(source, *args, **kwargs):
    """Bypass source object as is to cheat Publisher."""
    return source


class SphinxBaseFileInput(FileInput):
    """A base class of SphinxFileInput."""

    def __init__(self, app, env, *args, **kwds):
        # type: (Sphinx, BuildEnvironment, Any, Any) -> None
        self.app = app
        self.env = env

        # set up error handler
        codecs.register_error('sphinx', self.warn_and_replace)  # type: ignore

        kwds['error_handler'] = 'sphinx'  # py3: handle error on open.
        FileInput.__init__(self, *args, **kwds)

    def decode(self, data):
        # type: (Union[unicode, bytes]) -> unicode
        if isinstance(data, text_type):  # py3: `data` already decoded.
            return data
        return data.decode(self.encoding, 'sphinx')  # py2: decoding

    def read(self):
        # type: () -> unicode
        data = FileInput.read(self)

        # emit source-read event
        arg = [data]
        self.app.emit('source-read', self.env.docname, arg)
        return arg[0]

    def warn_and_replace(self, error):
        # type: (Any) -> Tuple
        """Custom decoding error handler that warns and replaces."""
        linestart = error.object.rfind(b'\n', 0, error.start)
        lineend = error.object.find(b'\n', error.start)
        if lineend == -1:
            lineend = len(error.object)
        lineno = error.object.count(b'\n', 0, error.start) + 1
        logger.warning('undecodable source characters, replacing with "?": %r',
                       (error.object[linestart + 1:error.start] + b'>>>' +
                        error.object[error.start:error.end] + b'<<<' +
                        error.object[error.end:lineend]),
                       location=(self.env.docname, lineno))
        return (u'?', error.end)


class SphinxFileInput(SphinxBaseFileInput):
    pass


class SphinxRSTFileInput(SphinxBaseFileInput):
    def read(self):
        # type: () -> unicode
        data = SphinxBaseFileInput.read(self)
        docinfo, data = split_docinfo(data)
        if self.env.config.rst_epilog:
            data = data + '\n' + self.env.config.rst_epilog + '\n'
        if self.env.config.rst_prolog:
            data = self.env.config.rst_prolog + '\n' + data
        return docinfo + data


def read_doc(app, env, filename):
    # type: (Sphinx, BuildEnvironment, unicode) -> nodes.document
    """Parse a document and convert to doctree."""
    input_class = app.registry.get_source_input(filename)
    reader = SphinxStandaloneReader()
    source = input_class(app, env, source=None, source_path=filename,
                         encoding=env.config.source_encoding)
    parser = app.registry.create_source_parser(app, filename)

    pub = Publisher(reader=reader,
                    parser=parser,
                    writer=SphinxDummyWriter(),
                    source_class=SphinxDummySourceClass,
                    destination=NullOutput())
    pub.set_components(None, 'restructuredtext', None)
    pub.process_programmatic_settings(None, env.settings, None)
    pub.set_source(source, filename)
    pub.publish()
    return pub.document


def setup(app):
    app.registry.add_source_input('*', SphinxFileInput)
    app.registry.add_source_input('restructuredtext', SphinxRSTFileInput)