summaryrefslogtreecommitdiff
path: root/src/lxml
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-04-26 18:07:08 +0200
committerStefan Behnel <stefan_ml@behnel.de>2019-04-26 18:07:08 +0200
commitc1732d3bbf5bc46d4f91c3a2f45cc88125083d88 (patch)
tree9e54dce0628d9f67c246b1d06e63f786886c50b2 /src/lxml
parent1ce10a552b45d81c287ad5ffc66b32ebef6266ae (diff)
downloadpython-lxml-c1732d3bbf5bc46d4f91c3a2f45cc88125083d88.tar.gz
Add C14N 2.0 implementation.
Diffstat (limited to 'src/lxml')
-rw-r--r--src/lxml/serializer.pxi320
-rw-r--r--src/lxml/tests/c14n-20/c14nComment.xml4
-rw-r--r--src/lxml/tests/c14n-20/c14nDefault.xml3
-rw-r--r--src/lxml/tests/c14n-20/c14nPrefix.xml4
-rw-r--r--src/lxml/tests/c14n-20/c14nPrefixQname.xml7
-rw-r--r--src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml8
-rw-r--r--src/lxml/tests/c14n-20/c14nQname.xml6
-rw-r--r--src/lxml/tests/c14n-20/c14nQnameElem.xml6
-rw-r--r--src/lxml/tests/c14n-20/c14nQnameXpathElem.xml7
-rw-r--r--src/lxml/tests/c14n-20/c14nTrim.xml4
-rw-r--r--src/lxml/tests/c14n-20/doc.dtd6
-rw-r--r--src/lxml/tests/c14n-20/doc.xsl5
-rw-r--r--src/lxml/tests/c14n-20/inC14N1.xml14
-rw-r--r--src/lxml/tests/c14n-20/inC14N2.xml11
-rw-r--r--src/lxml/tests/c14n-20/inC14N3.xml18
-rw-r--r--src/lxml/tests/c14n-20/inC14N4.xml13
-rw-r--r--src/lxml/tests/c14n-20/inC14N5.xml12
-rw-r--r--src/lxml/tests/c14n-20/inC14N6.xml2
-rw-r--r--src/lxml/tests/c14n-20/inNsContent.xml4
-rw-r--r--src/lxml/tests/c14n-20/inNsDefault.xml3
-rw-r--r--src/lxml/tests/c14n-20/inNsPushdown.xml6
-rw-r--r--src/lxml/tests/c14n-20/inNsRedecl.xml3
-rw-r--r--src/lxml/tests/c14n-20/inNsSort.xml4
-rw-r--r--src/lxml/tests/c14n-20/inNsSuperfluous.xml4
-rw-r--r--src/lxml/tests/c14n-20/inNsXml.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml6
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml11
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml1
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml14
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml14
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml1
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml10
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml2
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml1
-rw-r--r--src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml1
-rw-r--r--src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml6
-rw-r--r--src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml6
-rw-r--r--src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml4
-rw-r--r--src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml3
-rw-r--r--src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml3
-rw-r--r--src/lxml/tests/c14n-20/world.txt1
-rw-r--r--src/lxml/tests/test_elementtree.py181
57 files changed, 794 insertions, 1 deletions
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index d0e7ef56..7bc69202 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -856,6 +856,326 @@ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
message = errors[0].message
raise C14NError(message)
+
+# C14N 2.0
+
+def canonicalize(write, xml_data=None, *, file=None, **options):
+ """Convert XML to its C14N 2.0 serialised form.
+
+ The C14N serialised output is written using the *write* function.
+ To write to a file, open it in text mode with encoding "utf-8" and pass
+ its ``.write`` method.
+
+ Either *xml_data* (an XML string) or *file* (a file-like object) must be
+ provided as input.
+
+ The configuration options are the same as for the ``C14NWriterTarget``.
+ """
+ cdef _FeedParser parser = XMLParser(
+ target=C14NWriterTarget(write, **options),
+ attribute_defaults=True,
+ collect_ids=False,
+ )
+
+ try:
+ if xml_data is not None:
+ parser.feed(xml_data)
+ elif file is not None:
+ d = file.read(64*1024)
+ while d:
+ parser.feed(d)
+ d = file.read(64*1024)
+ finally:
+ parser.close()
+
+
+cdef object _looks_like_prefix_name = re.compile('^\w+:\w+$', re.UNICODE).match
+
+
+cdef class C14NWriterTarget:
+ """
+ Canonicalization writer target for the XMLParser.
+
+ Serialises parse events to XML C14N 2.0.
+
+ Configuration options:
+
+ - *comments*: set to true to include comments
+ - *strip_text*: set to true to strip whitespace before and after text content
+ - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
+ - *qname_aware_tags*: a set of qname aware tag names in which prefixes
+ should be replaced in text content
+ - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
+ should be replaced in text content
+ """
+ cdef object _write
+ cdef list _data
+ cdef set _qname_aware_tags
+ cdef object _find_qname_aware_attrs
+ cdef list _declared_ns_stack
+ cdef list _ns_stack
+ cdef dict _prefix_map
+ cdef list _preserve_space
+ cdef tuple _pending_start
+ cdef bint _comments
+ cdef bint _strip_text
+ cdef bint _rewrite_prefixes
+ cdef bint _root_seen
+ cdef bint _root_done
+
+ def __init__(self, write, *,
+ comments=False, strip_text=False, rewrite_prefixes=False,
+ qname_aware_tags=None, qname_aware_attrs=None):
+ self._write = write
+ self._data = []
+ self._comments = comments
+ self._strip_text = strip_text
+
+ self._rewrite_prefixes = rewrite_prefixes
+ if qname_aware_tags:
+ self._qname_aware_tags = set(qname_aware_tags)
+ else:
+ self._qname_aware_tags = None
+ if qname_aware_attrs:
+ self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
+ else:
+ self._find_qname_aware_attrs = None
+
+ # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
+ self._declared_ns_stack = [[
+ ("http://www.w3.org/XML/1998/namespace", "xml"),
+ ]]
+ # Stack with user declared namespace prefixes as (uri, prefix) pairs.
+ self._ns_stack = []
+ if not rewrite_prefixes:
+ self._ns_stack.append(list(_DEFAULT_NAMESPACE_PREFIXES.items()))
+ self._ns_stack.append([])
+ self._prefix_map = {}
+ self._preserve_space = [False]
+ self._pending_start = None
+ self._root_seen = False
+ self._root_done = False
+
+ def _iter_namespaces(self, ns_stack):
+ for namespaces in reversed(ns_stack):
+ if namespaces: # almost no element declares new namespaces
+ yield from namespaces
+
+ cdef _resolve_prefix_name(self, prefixed_name):
+ prefix, name = prefixed_name.split(':', 1)
+ for uri, p in self._iter_namespaces(self._ns_stack):
+ if p == prefix:
+ return f'{{{uri}}}{name}'
+ raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
+
+ cdef _qname(self, qname, uri=None):
+ if uri is None:
+ uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
+ else:
+ tag = qname
+
+ prefixes_seen = set()
+ for u, prefix in self._iter_namespaces(self._declared_ns_stack):
+ if u == uri and prefix not in prefixes_seen:
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
+ prefixes_seen.add(prefix)
+
+ # Not declared yet => add new declaration.
+ if self._rewrite_prefixes:
+ if uri in self._prefix_map:
+ prefix = self._prefix_map[uri]
+ else:
+ prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
+ self._declared_ns_stack[-1].append((uri, prefix))
+ return f'{prefix}:{tag}', tag, uri
+
+ if not uri and '' not in prefixes_seen:
+ # No default namespace declared => no prefix needed.
+ return tag, tag, uri
+
+ for u, prefix in self._iter_namespaces(self._ns_stack):
+ if u == uri:
+ self._declared_ns_stack[-1].append((uri, prefix))
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
+
+ raise ValueError(f'Namespace "{uri}" is not declared in scope')
+
+ def data(self, data):
+ self._data.append(data)
+
+ cdef _flush(self):
+ data = u''.join(self._data)
+ del self._data[:]
+ if self._strip_text and not self._preserve_space[-1]:
+ data = data.strip()
+ if self._pending_start is not None:
+ (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None
+ qname_text = data if u':' in data and _looks_like_prefix_name(data) else None
+ self._start(tag, attrs, new_namespaces, qname_text)
+ if qname_text is not None:
+ return
+ if data and self._root_seen:
+ self._write(_escape_cdata_c14n(data))
+
+ def start_ns(self, prefix, uri):
+ # we may have to resolve qnames in text content
+ if self._data:
+ self._flush()
+ self._ns_stack[-1].append((uri, prefix))
+
+ def start(self, tag, attrs):
+ if self._data:
+ self._flush()
+
+ new_namespaces = []
+ self._declared_ns_stack.append(new_namespaces)
+
+ if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
+ # Need to parse text first to see if it requires a prefix declaration.
+ self._pending_start = (tag, attrs, new_namespaces)
+ return
+ self._start(tag, attrs, new_namespaces)
+
+ cdef _start(self, tag, attrs, new_namespaces, qname_text=None):
+ qnames = {tag, *attrs}
+ resolved_names = {}
+
+ # Resolve prefixes in attribute and tag text.
+ if qname_text is not None:
+ qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
+ qnames.add(qname)
+ if self._find_qname_aware_attrs is not None and attrs:
+ qattrs = self._find_qname_aware_attrs(attrs)
+ if qattrs:
+ for attr_name in qattrs:
+ value = attrs[attr_name]
+ if _looks_like_prefix_name(value):
+ qname = resolved_names[value] = self._resolve_prefix_name(value)
+ qnames.add(qname)
+ else:
+ qattrs = None
+ else:
+ qattrs = None
+
+ # Assign prefixes in lexicographical order of used URIs.
+ parsed_qnames = {n: self._qname(n) for n in sorted(
+ qnames, key=lambda n: n.split('}', 1))}
+
+ # Write namespace declarations in prefix order ...
+ attr_list = sorted(
+ (u'xmlns:' + prefix if prefix else u'xmlns', uri)
+ for uri, prefix in new_namespaces
+ ) if new_namespaces else [] # almost always empty
+
+ # ... followed by attributes in URI+name order
+ for k, v in sorted(attrs.items()):
+ if qattrs is not None and k in qattrs and v in resolved_names:
+ v = parsed_qnames[resolved_names[v]][0]
+ attr_qname, attr_name, uri = parsed_qnames[k]
+ # No prefix for attributes in default ('') namespace.
+ attr_list.append((attr_qname if uri else attr_name, v))
+
+ # Honour xml:space attributes.
+ space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
+ self._preserve_space.append(
+ space_behaviour == 'preserve' if space_behaviour
+ else self._preserve_space[-1])
+
+ # Write the tag.
+ write = self._write
+ write(u'<' + parsed_qnames[tag][0])
+ if attr_list:
+ write(u''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
+ write(u'>')
+
+ # Write the resolved qname text content.
+ if qname_text is not None:
+ write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
+
+ self._root_seen = True
+ self._ns_stack.append([])
+
+ def end(self, tag):
+ if self._data:
+ self._flush()
+ self._write(f'</{self._qname(tag)[0]}>')
+ self._preserve_space.pop()
+ self._root_done = len(self._preserve_space) == 1
+ self._declared_ns_stack.pop()
+ self._ns_stack.pop()
+
+ def comment(self, text):
+ if not self._comments:
+ return
+ if self._root_done:
+ self._write(u'\n')
+ elif self._root_seen and self._data:
+ self._flush()
+ self._write(f'<!--{_escape_cdata_c14n(text)}-->')
+ if not self._root_seen:
+ self._write(u'\n')
+
+ def pi(self, target, data):
+ if self._root_done:
+ self._write(u'\n')
+ elif self._root_seen and self._data:
+ self._flush()
+ self._write(
+ f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
+ if not self._root_seen:
+ self._write(u'\n')
+
+ def close(self):
+ return None
+
+
+cdef _raise_serialization_error(text):
+ raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
+
+
+cdef unicode _escape_cdata_c14n(stext):
+ # escape character data
+ cdef unicode text
+ try:
+ # it's worth avoiding do-nothing calls for strings that are
+ # shorter than 500 character, or so. assume that's, by far,
+ # the most common case in most applications.
+ text = unicode(stext)
+ if u'&' in text:
+ text = text.replace(u'&', u'&amp;')
+ if u'<' in text:
+ text = text.replace(u'<', u'&lt;')
+ if u'>' in text:
+ text = text.replace(u'>', u'&gt;')
+ if u'\r' in text:
+ text = text.replace(u'\r', u'&#xD;')
+ return text
+ except (TypeError, AttributeError):
+ _raise_serialization_error(stext)
+
+
+cdef unicode _escape_attrib_c14n(stext):
+ # escape attribute value
+ cdef unicode text
+ try:
+ text = unicode(stext)
+ if u'&' in text:
+ text = text.replace(u'&', u'&amp;')
+ if u'<' in text:
+ text = text.replace(u'<', u'&lt;')
+ if u'"' in text:
+ text = text.replace(u'"', u'&quot;')
+ if u'\t' in text:
+ text = text.replace(u'\t', u'&#x9;')
+ if u'\n' in text:
+ text = text.replace(u'\n', u'&#xA;')
+ if u'\r' in text:
+ text = text.replace(u'\r', u'&#xD;')
+ return text
+ except (TypeError, AttributeError):
+ _raise_serialization_error(stext)
+
+
# incremental serialisation
cdef class xmlfile:
diff --git a/src/lxml/tests/c14n-20/c14nComment.xml b/src/lxml/tests/c14n-20/c14nComment.xml
new file mode 100644
index 00000000..e95aa302
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nComment.xml
@@ -0,0 +1,4 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:IgnoreComments>true</c14n2:IgnoreComments>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nDefault.xml b/src/lxml/tests/c14n-20/c14nDefault.xml
new file mode 100644
index 00000000..c1364142
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nDefault.xml
@@ -0,0 +1,3 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" Algorithm="http://www.w3.org/2010/xml-c14n2">
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nPrefix.xml b/src/lxml/tests/c14n-20/c14nPrefix.xml
new file mode 100644
index 00000000..fb233b42
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nPrefix.xml
@@ -0,0 +1,4 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nPrefixQname.xml b/src/lxml/tests/c14n-20/c14nPrefixQname.xml
new file mode 100644
index 00000000..23188eed
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nPrefixQname.xml
@@ -0,0 +1,7 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+ <c14n2:QNameAware>
+ <c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
+ </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml b/src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml
new file mode 100644
index 00000000..626fc48f
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml
@@ -0,0 +1,8 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+ <c14n2:QNameAware>
+ <c14n2:Element Name="bar" NS="http://a"/>
+ <c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
+ </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nQname.xml b/src/lxml/tests/c14n-20/c14nQname.xml
new file mode 100644
index 00000000..919e5903
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nQname.xml
@@ -0,0 +1,6 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:QNameAware>
+ <c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
+ </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nQnameElem.xml b/src/lxml/tests/c14n-20/c14nQnameElem.xml
new file mode 100644
index 00000000..0321f806
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nQnameElem.xml
@@ -0,0 +1,6 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:QNameAware>
+ <c14n2:Element Name="bar" NS="http://a"/>
+ </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nQnameXpathElem.xml b/src/lxml/tests/c14n-20/c14nQnameXpathElem.xml
new file mode 100644
index 00000000..c4890bc8
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nQnameXpathElem.xml
@@ -0,0 +1,7 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:QNameAware>
+ <c14n2:Element Name="bar" NS="http://a"/>
+ <c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
+ </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/c14nTrim.xml b/src/lxml/tests/c14n-20/c14nTrim.xml
new file mode 100644
index 00000000..ccb9cf65
--- /dev/null
+++ b/src/lxml/tests/c14n-20/c14nTrim.xml
@@ -0,0 +1,4 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:TrimTextNodes>true</c14n2:TrimTextNodes>
+</dsig:CanonicalizationMethod>
+
diff --git a/src/lxml/tests/c14n-20/doc.dtd b/src/lxml/tests/c14n-20/doc.dtd
new file mode 100644
index 00000000..5c5d544a
--- /dev/null
+++ b/src/lxml/tests/c14n-20/doc.dtd
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!ELEMENT doc (#PCDATA)>
+
+
+
diff --git a/src/lxml/tests/c14n-20/doc.xsl b/src/lxml/tests/c14n-20/doc.xsl
new file mode 100644
index 00000000..a3f2348c
--- /dev/null
+++ b/src/lxml/tests/c14n-20/doc.xsl
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ >
+</xsl:stylesheet>
diff --git a/src/lxml/tests/c14n-20/inC14N1.xml b/src/lxml/tests/c14n-20/inC14N1.xml
new file mode 100644
index 00000000..ed450c73
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inC14N1.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+
+<?xml-stylesheet href="doc.xsl"
+ type="text/xsl" ?>
+
+<!DOCTYPE doc SYSTEM "doc.dtd">
+
+<doc>Hello, world!<!-- Comment 1 --></doc>
+
+<?pi-without-data ?>
+
+<!-- Comment 2 -->
+
+<!-- Comment 3 -->
diff --git a/src/lxml/tests/c14n-20/inC14N2.xml b/src/lxml/tests/c14n-20/inC14N2.xml
new file mode 100644
index 00000000..74eeea14
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inC14N2.xml
@@ -0,0 +1,11 @@
+<doc>
+ <clean> </clean>
+ <dirty> A B </dirty>
+ <mixed>
+ A
+ <clean> </clean>
+ B
+ <dirty> A B </dirty>
+ C
+ </mixed>
+</doc>
diff --git a/src/lxml/tests/c14n-20/inC14N3.xml b/src/lxml/tests/c14n-20/inC14N3.xml
new file mode 100644
index 00000000..fea78213
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inC14N3.xml
@@ -0,0 +1,18 @@
+<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
+<doc>
+ <e1 />
+ <e2 ></e2>
+ <e3 name = "elem3" id="elem3" />
+ <e4 name="elem4" id="elem4" ></e4>
+ <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
+ xmlns:b="http://www.ietf.org"
+ xmlns:a="http://www.w3.org"
+ xmlns="http://example.org"/>
+ <e6 xmlns="" xmlns:a="http://www.w3.org">
+ <e7 xmlns="http://www.ietf.org">
+ <e8 xmlns="" xmlns:a="http://www.w3.org">
+ <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
+ </e8>
+ </e7>
+ </e6>
+</doc>
diff --git a/src/lxml/tests/c14n-20/inC14N4.xml b/src/lxml/tests/c14n-20/inC14N4.xml
new file mode 100644
index 00000000..909a8474
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inC14N4.xml
@@ -0,0 +1,13 @@
+<!DOCTYPE doc [
+<!ATTLIST normId id ID #IMPLIED>
+<!ATTLIST normNames attr NMTOKENS #IMPLIED>
+]>
+<doc>
+ <text>First line&#x0d;&#10;Second line</text>
+ <value>&#x32;</value>
+ <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+ <compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>
+ <norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>
+ <normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>
+ <normId id=' &apos;&#x20;&#13;&#xa;&#9; &apos; '/>
+</doc>
diff --git a/src/lxml/tests/c14n-20/inC14N5.xml b/src/lxml/tests/c14n-20/inC14N5.xml
new file mode 100644
index 00000000..501161ba
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inC14N5.xml
@@ -0,0 +1,12 @@
+<!DOCTYPE doc [
+<!ATTLIST doc attrExtEnt CDATA #IMPLIED>
+<!ENTITY ent1 "Hello">
+<!ENTITY ent2 SYSTEM "world.txt">
+<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
+<!NOTATION gif SYSTEM "viewgif.exe">
+]>
+<doc attrExtEnt="entExt">
+ &ent1;, &ent2;!
+</doc>
+
+<!-- Let world.txt contain "world" (excluding the quotes) -->
diff --git a/src/lxml/tests/c14n-20/inC14N6.xml b/src/lxml/tests/c14n-20/inC14N6.xml
new file mode 100644
index 00000000..31e20718
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inC14N6.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<doc>&#169;</doc>
diff --git a/src/lxml/tests/c14n-20/inNsContent.xml b/src/lxml/tests/c14n-20/inNsContent.xml
new file mode 100644
index 00000000..b9924660
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsContent.xml
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:child="http://c" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+ <a:bar>xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo>
diff --git a/src/lxml/tests/c14n-20/inNsDefault.xml b/src/lxml/tests/c14n-20/inNsDefault.xml
new file mode 100644
index 00000000..3e0d323b
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsDefault.xml
@@ -0,0 +1,3 @@
+<foo xmlns:a="http://a" xmlns:b="http://b">
+ <b:bar b:att1="val" att2="val"/>
+</foo>
diff --git a/src/lxml/tests/c14n-20/inNsPushdown.xml b/src/lxml/tests/c14n-20/inNsPushdown.xml
new file mode 100644
index 00000000..daa67d83
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsPushdown.xml
@@ -0,0 +1,6 @@
+<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:c="http://c">
+ <b:bar/>
+ <b:bar/>
+ <b:bar/>
+ <a:bar b:att1="val"/>
+</a:foo>
diff --git a/src/lxml/tests/c14n-20/inNsRedecl.xml b/src/lxml/tests/c14n-20/inNsRedecl.xml
new file mode 100644
index 00000000..10bd97be
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsRedecl.xml
@@ -0,0 +1,3 @@
+<foo xmlns:a="http://z3" xmlns:b="http://z2" a:att1="val1" b:att2="val2">
+ <bar xmlns="http://z0" xmlns:a="http://z2" a:att1="val1" b:att2="val2" xmlns:b="http://z3" />
+</foo>
diff --git a/src/lxml/tests/c14n-20/inNsSort.xml b/src/lxml/tests/c14n-20/inNsSort.xml
new file mode 100644
index 00000000..8e9fc01c
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsSort.xml
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://z3" xmlns:b="http://z2" b:att1="val1" c:att3="val3" b:att2="val2" xmlns:c="http://z1" xmlns:d="http://z0">
+ <c:bar/>
+ <c:bar d:att3="val3"/>
+</a:foo>
diff --git a/src/lxml/tests/c14n-20/inNsSuperfluous.xml b/src/lxml/tests/c14n-20/inNsSuperfluous.xml
new file mode 100644
index 00000000..f77720f7
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsSuperfluous.xml
@@ -0,0 +1,4 @@
+<foo xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2" xmlns="http://z0">
+ <c:bar xmlns:a="http://z0" xmlns:c="http://z0" c:att3="val3"/>
+ <d:bar xmlns:d="http://z0"/>
+</foo>
diff --git a/src/lxml/tests/c14n-20/inNsXml.xml b/src/lxml/tests/c14n-20/inNsXml.xml
new file mode 100644
index 00000000..7520cf3f
--- /dev/null
+++ b/src/lxml/tests/c14n-20/inNsXml.xml
@@ -0,0 +1,3 @@
+<foo xmlns="http://z0" xml:id="23">
+ <bar xsi:type="xsd:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">data</bar>
+</foo>
diff --git a/src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml b/src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml
new file mode 100644
index 00000000..d98d1684
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml
@@ -0,0 +1,6 @@
+<?xml-stylesheet href="doc.xsl"
+ type="text/xsl" ?>
+<doc>Hello, world!<!-- Comment 1 --></doc>
+<?pi-without-data?>
+<!-- Comment 2 -->
+<!-- Comment 3 --> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml
new file mode 100644
index 00000000..af9a9770
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml
@@ -0,0 +1,4 @@
+<?xml-stylesheet href="doc.xsl"
+ type="text/xsl" ?>
+<doc>Hello, world!</doc>
+<?pi-without-data?> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml
new file mode 100644
index 00000000..2afa15cc
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml
@@ -0,0 +1,11 @@
+<doc>
+ <clean> </clean>
+ <dirty> A B </dirty>
+ <mixed>
+ A
+ <clean> </clean>
+ B
+ <dirty> A B </dirty>
+ C
+ </mixed>
+</doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml
new file mode 100644
index 00000000..7a1dc329
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml
@@ -0,0 +1 @@
+<doc><clean></clean><dirty>A B</dirty><mixed>A<clean></clean>B<dirty>A B</dirty>C</mixed></doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml
new file mode 100644
index 00000000..662e108a
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml
@@ -0,0 +1,14 @@
+<doc>
+ <e1></e1>
+ <e2></e2>
+ <e3 id="elem3" name="elem3"></e3>
+ <e4 id="elem4" name="elem4"></e4>
+ <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
+ <e6>
+ <e7 xmlns="http://www.ietf.org">
+ <e8 xmlns="">
+ <e9 attr="default"></e9>
+ </e8>
+ </e7>
+ </e6>
+</doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml
new file mode 100644
index 00000000..041e1ec8
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml
@@ -0,0 +1,14 @@
+<n0:doc xmlns:n0="">
+ <n0:e1></n0:e1>
+ <n0:e2></n0:e2>
+ <n0:e3 id="elem3" name="elem3"></n0:e3>
+ <n0:e4 id="elem4" name="elem4"></n0:e4>
+ <n1:e5 xmlns:n1="http://example.org" xmlns:n2="http://www.ietf.org" xmlns:n3="http://www.w3.org" attr="I'm" attr2="all" n2:attr="sorted" n3:attr="out"></n1:e5>
+ <n0:e6>
+ <n2:e7 xmlns:n2="http://www.ietf.org">
+ <n0:e8>
+ <n0:e9 attr="default"></n0:e9>
+ </n0:e8>
+ </n2:e7>
+ </n0:e6>
+</n0:doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml
new file mode 100644
index 00000000..4f35ad96
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml
@@ -0,0 +1 @@
+<doc><e1></e1><e2></e2><e3 id="elem3" name="elem3"></e3><e4 id="elem4" name="elem4"></e4><e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5><e6><e7 xmlns="http://www.ietf.org"><e8 xmlns=""><e9 attr="default"></e9></e8></e7></e6></doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml
new file mode 100644
index 00000000..243d0e61
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml
@@ -0,0 +1,10 @@
+<doc>
+ <text>First line&#xD;
+Second line</text>
+ <value>2</value>
+ <compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>
+ <compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>
+ <norm attr=" ' &#xD;&#xA;&#x9; ' "></norm>
+ <normNames attr="A &#xD;&#xA;&#x9; B"></normNames>
+ <normId id="' &#xD;&#xA;&#x9; '"></normId>
+</doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml
new file mode 100644
index 00000000..24d83ba8
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml
@@ -0,0 +1,2 @@
+<doc><text>First line&#xD;
+Second line</text><value>2</value><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute><compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute><norm attr=" ' &#xD;&#xA;&#x9; ' "></norm><normNames attr="A &#xD;&#xA;&#x9; B"></normNames><normId id="' &#xD;&#xA;&#x9; '"></normId></doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml
new file mode 100644
index 00000000..c232e740
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml
@@ -0,0 +1,3 @@
+<doc attrExtEnt="entExt">
+ Hello, world!
+</doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml
new file mode 100644
index 00000000..3fa84b1e
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml
@@ -0,0 +1 @@
+<doc attrExtEnt="entExt">Hello, world!</doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml
new file mode 100644
index 00000000..0be38f98
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml
@@ -0,0 +1 @@
+<doc>©</doc> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml
new file mode 100644
index 00000000..62d7e004
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a">
+ <a:bar>xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml
new file mode 100644
index 00000000..20e1c2e9
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml
@@ -0,0 +1,4 @@
+<n0:foo xmlns:n0="http://a">
+ <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema">n1:string</n0:bar>
+ <n4:IncludedXPath xmlns:n2="http://b" xmlns:n3="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:n4="http://www.w3.org/2010/xmldsig2#">/n3:body/child::n2:foo[@att1 != "c:val" and @att2 != 'xsd:string']</n4:IncludedXPath>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml
new file mode 100644
index 00000000..db8680da
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a">
+ <a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml
new file mode 100644
index 00000000..df3b2157
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a">
+ <a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:b="http://b" xmlns:dsig2="http://www.w3.org/2010/xmldsig2#" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml
new file mode 100644
index 00000000..674b076d
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml
@@ -0,0 +1,3 @@
+<foo>
+ <b:bar xmlns:b="http://b" att2="val" b:att1="val"></b:bar>
+</foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml
new file mode 100644
index 00000000..83edaae9
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="">
+ <n1:bar xmlns:n1="http://b" att2="val" n1:att1="val"></n1:bar>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml
new file mode 100644
index 00000000..fa4f21b5
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml
@@ -0,0 +1,6 @@
+<a:foo xmlns:a="http://a">
+ <b:bar xmlns:b="http://b"></b:bar>
+ <b:bar xmlns:b="http://b"></b:bar>
+ <b:bar xmlns:b="http://b"></b:bar>
+ <a:bar xmlns:b="http://b" b:att1="val"></a:bar>
+</a:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml
new file mode 100644
index 00000000..6d579200
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml
@@ -0,0 +1,6 @@
+<n0:foo xmlns:n0="http://a">
+ <n1:bar xmlns:n1="http://b"></n1:bar>
+ <n1:bar xmlns:n1="http://b"></n1:bar>
+ <n1:bar xmlns:n1="http://b"></n1:bar>
+ <n0:bar xmlns:n1="http://b" n1:att1="val"></n0:bar>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml
new file mode 100644
index 00000000..ba37f925
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml
@@ -0,0 +1,3 @@
+<foo xmlns:a="http://z3" xmlns:b="http://z2" b:att2="val2" a:att1="val1">
+ <bar xmlns="http://z0" xmlns:a="http://z2" xmlns:b="http://z3" a:att1="val1" b:att2="val2"></bar>
+</foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml
new file mode 100644
index 00000000..af3bb2d6
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="" xmlns:n1="http://z2" xmlns:n2="http://z3" n1:att2="val2" n2:att1="val1">
+ <n3:bar xmlns:n3="http://z0" n1:att1="val1" n2:att2="val2"></n3:bar>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml
new file mode 100644
index 00000000..8a92c5c6
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://z3" xmlns:b="http://z2" xmlns:c="http://z1" c:att3="val3" b:att1="val1" b:att2="val2">
+ <c:bar></c:bar>
+ <c:bar xmlns:d="http://z0" d:att3="val3"></c:bar>
+</a:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml
new file mode 100644
index 00000000..8d44c84f
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml
@@ -0,0 +1,4 @@
+<n2:foo xmlns:n0="http://z1" xmlns:n1="http://z2" xmlns:n2="http://z3" n0:att3="val3" n1:att1="val1" n1:att2="val2">
+ <n0:bar></n0:bar>
+ <n0:bar xmlns:n3="http://z0" n3:att3="val3"></n0:bar>
+</n2:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml
new file mode 100644
index 00000000..6bb862d7
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml
@@ -0,0 +1,4 @@
+<foo xmlns="http://z0" xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2">
+ <c:bar xmlns:c="http://z0" c:att3="val3"></c:bar>
+ <d:bar xmlns:d="http://z0"></d:bar>
+</foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml
new file mode 100644
index 00000000..700a16d4
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml
@@ -0,0 +1,4 @@
+<n0:foo xmlns:n0="http://z0" n0:att1="val1" n0:att2="val2">
+ <n0:bar n0:att3="val3"></n0:bar>
+ <n0:bar></n0:bar>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml
new file mode 100644
index 00000000..1689f3bf
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml
@@ -0,0 +1,3 @@
+<foo xmlns="http://z0" xml:id="23">
+ <bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar>
+</foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml
new file mode 100644
index 00000000..38508a47
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="http://z0" xml:id="23">
+ <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema-instance" n1:type="xsd:string">data</n0:bar>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml
new file mode 100644
index 00000000..867980f8
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="http://z0" xml:id="23">
+ <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema" xmlns:n2="http://www.w3.org/2001/XMLSchema-instance" n2:type="n1:string">data</n0:bar>
+</n0:foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml
new file mode 100644
index 00000000..0300f9d5
--- /dev/null
+++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml
@@ -0,0 +1,3 @@
+<foo xmlns="http://z0" xml:id="23">
+ <bar xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar>
+</foo> \ No newline at end of file
diff --git a/src/lxml/tests/c14n-20/world.txt b/src/lxml/tests/c14n-20/world.txt
new file mode 100644
index 00000000..04fea064
--- /dev/null
+++ b/src/lxml/tests/c14n-20/world.txt
@@ -0,0 +1 @@
+world \ No newline at end of file
diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py
index 78701a71..3de74639 100644
--- a/src/lxml/tests/test_elementtree.py
+++ b/src/lxml/tests/test_elementtree.py
@@ -9,13 +9,15 @@ for IO related test cases.
"""
import copy
+import io
import operator
import os
import re
import sys
import textwrap
import unittest
-from functools import wraps
+from contextlib import contextmanager
+from functools import wraps, partial
from itertools import islice
this_dir = os.path.dirname(__file__)
@@ -4637,6 +4639,171 @@ class _XMLPullParserTest(unittest.TestCase):
self.etree.XMLPullParser(events=('start', 'end', 'bogus'))
+class _C14NTest(unittest.TestCase):
+ etree = None
+ maxDiff = None
+
+ if not hasattr(unittest.TestCase, 'subTest'):
+ @contextmanager
+ def subTest(self, name):
+ try:
+ yield
+ except Exception as e:
+ print("Subtest {} failed: {}".format(name, e))
+ raise
+
+ #
+ # simple roundtrip tests (from c14n.py)
+
+ def c14n_roundtrip(self, xml, **options):
+ f = io.StringIO()
+ self.etree.canonicalize(f.write, xml, **options)
+ return f.getvalue()
+
+ def test_simple_roundtrip(self):
+ c14n_roundtrip = self.c14n_roundtrip
+ # Basics
+ self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
+ self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
+ '<doc xmlns="uri"></doc>')
+ self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
+ '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
+ self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
+ '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
+ self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
+ '<elem></elem>')
+
+ # C14N spec
+ self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
+ '<doc>Hello, world!</doc>')
+ self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
+ '<value>2</value>')
+ self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
+ '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
+ self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
+ '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
+ self.assertEqual(c14n_roundtrip("<norm attr=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
+ '<norm attr=" \' &#xD;&#xA;&#x9; \' "></norm>')
+ self.assertEqual(c14n_roundtrip("<normNames attr=' A &#x20;&#13;&#xa;&#9; B '/>"),
+ '<normNames attr=" A &#xD;&#xA;&#x9; B "></normNames>')
+ self.assertEqual(c14n_roundtrip("<normId id=' &apos; &#x20;&#13;&#xa;&#9; &apos; '/>"),
+ '<normId id=" \' &#xD;&#xA;&#x9; \' "></normId>')
+
+ # fragments from PJ's tests
+ #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
+ #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
+
+ #
+ # basic method=c14n tests from the c14n 2.0 specification. uses
+ # test files under xmltestdata/c14n-20.
+
+ # note that this uses generated C14N versions of the standard ET.write
+ # output, not roundtripped C14N (see above).
+
+ def test_xml_c14n2(self):
+ datadir = os.path.join(os.path.dirname(__file__), "c14n-20")
+ full_path = partial(os.path.join, datadir)
+
+ files = [filename[:-4] for filename in sorted(os.listdir(datadir))
+ if filename.endswith('.xml')]
+ input_files = [
+ filename for filename in files
+ if filename.startswith('in')
+ ]
+ configs = {
+ filename: {
+ # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+ option.tag.split('}')[-1]: ((option.text or '').strip(), option)
+ for option in self.etree.parse(full_path(filename) + ".xml").getroot()
+ }
+ for filename in files
+ if filename.startswith('c14n')
+ }
+
+ tests = {
+ input_file: [
+ (filename, configs[filename.rsplit('_', 1)[-1]])
+ for filename in files
+ if filename.startswith('out_%s_' % input_file)
+ and filename.rsplit('_', 1)[-1] in configs
+ ]
+ for input_file in input_files
+ }
+
+ # Make sure we found all test cases.
+ self.assertEqual(30, len([
+ output_file for output_files in tests.values()
+ for output_file in output_files]))
+
+ def get_option(config, option_name, default=None):
+ return config.get(option_name, (default, ()))[0]
+
+ for input_file, output_files in tests.items():
+ for output_file, config in output_files:
+ keep_comments = get_option(
+ config, 'IgnoreComments') == 'true' # no, it's right :)
+ strip_text = get_option(
+ config, 'TrimTextNodes') == 'true'
+ rewrite_prefixes = get_option(
+ config, 'PrefixRewrite') == 'sequential'
+ if 'QNameAware' in config:
+ qattrs = [
+ "{%s}%s" % (el.get('NS'), el.get('Name'))
+ for el in config['QNameAware'][1].findall(
+ '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
+ ]
+ qtags = [
+ "{%s}%s" % (el.get('NS'), el.get('Name'))
+ for el in config['QNameAware'][1].findall(
+ '{http://www.w3.org/2010/xml-c14n2}Element')
+ ]
+ else:
+ qtags = qattrs = None
+
+ # Build subtest description from config.
+ config_descr = ','.join(
+ "%s=%s" % (name, value or ','.join(c.tag.split('}')[-1] for c in children))
+ for name, (value, children) in sorted(config.items())
+ )
+
+ with self.subTest("{}({})".format(output_file, config_descr)):
+ if input_file == 'inNsRedecl' and not rewrite_prefixes:
+ self.skipTest(
+ "Redeclared namespace handling is not supported in {}".format(
+ output_file))
+ if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
+ self.skipTest(
+ "Redeclared namespace handling is not supported in {}".format(
+ output_file))
+ if 'QNameAware' in config and config['QNameAware'][1].find(
+ '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
+ self.skipTest(
+ "QName rewriting in XPath text is not supported in {}".format(
+ output_file))
+
+ out = io.StringIO()
+ with io.open(full_path(input_file + ".xml"), 'rb') as f:
+ if input_file == 'inC14N5':
+ # Hack: avoid setting up external entity resolution in the parser.
+ with open(full_path('world.txt'), 'rb') as entity_file:
+ f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read().strip()))
+
+ self.etree.canonicalize(
+ out.write, file=f,
+ comments=keep_comments,
+ strip_text=strip_text,
+ rewrite_prefixes=rewrite_prefixes,
+ qname_aware_tags=qtags, qname_aware_attrs=qattrs)
+ text = out.getvalue()
+ with io.open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
+ expected = f.read()
+ if input_file == 'inC14N3' and self.etree is not etree:
+ # FIXME: cET resolves default attributes but ET does not!
+ expected = expected.replace(' attr="default"', '')
+ text = text.replace(' attr="default"', '')
+ self.assertEqual(expected, text)
+
+
if etree:
class ETreeTestCase(_ETreeTestCaseBase):
etree = etree
@@ -4647,6 +4814,9 @@ if etree:
class ETreeElementSlicingTest(_ElementSlicingTest):
etree = etree
+ class ETreeC14NTest(_C14NTest):
+ etree = etree
+
if ElementTree:
class ElementTreeTestCase(_ETreeTestCaseBase):
@@ -4671,6 +4841,12 @@ if ElementTree:
else:
ElementTreePullTestCase = None
+ if hasattr(ElementTree, 'canonicalize'):
+ class ElementTreeC14NTest(_C14NTest):
+ etree = ElementTree
+ else:
+ ElementTreeC14NTest = None
+
class ElementTreeElementSlicingTest(_ElementSlicingTest):
etree = ElementTree
@@ -4693,10 +4869,13 @@ def test_suite():
suite.addTests([unittest.makeSuite(ETreeTestCase)])
suite.addTests([unittest.makeSuite(ETreePullTestCase)])
suite.addTests([unittest.makeSuite(ETreeElementSlicingTest)])
+ suite.addTests([unittest.makeSuite(ETreeC14NTest)])
if ElementTree:
suite.addTests([unittest.makeSuite(ElementTreeTestCase)])
if ElementTreePullTestCase:
suite.addTests([unittest.makeSuite(ElementTreePullTestCase)])
+ if ElementTreeC14NTest:
+ suite.addTests([unittest.makeSuite(ElementTreeC14NTest)])
suite.addTests([unittest.makeSuite(ElementTreeElementSlicingTest)])
if cElementTree:
suite.addTests([unittest.makeSuite(CElementTreeTestCase)])