1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
# defusedxml
#
# Copyright (c) 2013-2020 by Christian Heimes <christian@python.org>
# Licensed to PSF under a Contributor Agreement.
# See https://www.python.org/psf/license for licensing details.
"""Defused xml.etree.ElementTree facade
"""
from __future__ import print_function, absolute_import
import sys
import warnings
from xml.etree.ElementTree import ParseError
from xml.etree.ElementTree import TreeBuilder as _TreeBuilder
from xml.etree.ElementTree import parse as _parse
from xml.etree.ElementTree import tostring
import importlib
from .common import DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden
__origin__ = "xml.etree.ElementTree"
def _get_py3_cls():
"""Python 3.3 hides the pure Python code but defusedxml requires it.
The code is based on test.support.import_fresh_module().
"""
pymodname = "xml.etree.ElementTree"
cmodname = "_elementtree"
pymod = sys.modules.pop(pymodname, None)
cmod = sys.modules.pop(cmodname, None)
sys.modules[cmodname] = None
try:
pure_pymod = importlib.import_module(pymodname)
finally:
# restore module
sys.modules[pymodname] = pymod
if cmod is not None:
sys.modules[cmodname] = cmod
else:
sys.modules.pop(cmodname, None)
# restore attribute on original package
etree_pkg = sys.modules["xml.etree"]
if pymod is not None:
etree_pkg.ElementTree = pymod
elif hasattr(etree_pkg, "ElementTree"):
del etree_pkg.ElementTree
_XMLParser = pure_pymod.XMLParser
_iterparse = pure_pymod.iterparse
# patch pure module to use ParseError from C extension
pure_pymod.ParseError = ParseError
return _XMLParser, _iterparse
_XMLParser, _iterparse = _get_py3_cls()
_sentinel = object()
class DefusedXMLParser(_XMLParser):
def __init__(
self,
html=_sentinel,
target=None,
encoding=None,
forbid_dtd=False,
forbid_entities=True,
forbid_external=True,
):
super().__init__(target=target, encoding=encoding)
if html is not _sentinel:
# the 'html' argument has been deprecated and ignored in all
# supported versions of Python. Python 3.8 finally removed it.
if html:
raise TypeError("'html=True' is no longer supported.")
else:
warnings.warn(
"'html' keyword argument is no longer supported. Pass "
"in arguments as keyword arguments.",
category=DeprecationWarning,
)
self.forbid_dtd = forbid_dtd
self.forbid_entities = forbid_entities
self.forbid_external = forbid_external
parser = self.parser
if self.forbid_dtd:
parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl
if self.forbid_entities:
parser.EntityDeclHandler = self.defused_entity_decl
parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl
if self.forbid_external:
parser.ExternalEntityRefHandler = self.defused_external_entity_ref_handler
def defused_start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise DTDForbidden(name, sysid, pubid)
def defused_entity_decl(
self, name, is_parameter_entity, value, base, sysid, pubid, notation_name
):
raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
def defused_unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name) # pragma: no cover
def defused_external_entity_ref_handler(self, context, base, sysid, pubid):
raise ExternalReferenceForbidden(context, base, sysid, pubid)
# aliases
# XMLParse is a typo, keep it for backwards compatibility
XMLTreeBuilder = XMLParse = XMLParser = DefusedXMLParser
def parse(source, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True):
if parser is None:
parser = DefusedXMLParser(
target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external,
)
return _parse(source, parser)
def iterparse(
source,
events=None,
parser=None,
forbid_dtd=False,
forbid_entities=True,
forbid_external=True,
):
if parser is None:
parser = DefusedXMLParser(
target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external,
)
return _iterparse(source, events, parser)
def fromstring(text, forbid_dtd=False, forbid_entities=True, forbid_external=True):
parser = DefusedXMLParser(
target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external,
)
parser.feed(text)
return parser.close()
XML = fromstring
def fromstringlist(sequence, forbid_dtd=False, forbid_entities=True, forbid_external=True):
parser = DefusedXMLParser(
target=_TreeBuilder(),
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities,
forbid_external=forbid_external,
)
for text in sequence:
parser.feed(text)
return parser.close()
__all__ = [
"ParseError",
"XML",
"XMLParse",
"XMLParser",
"XMLTreeBuilder",
"fromstring",
"fromstringlist",
"iterparse",
"parse",
"tostring",
]
|