diff options
| author | Stefan Behnel <stefan_ml@behnel.de> | 2019-04-26 18:07:08 +0200 |
|---|---|---|
| committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-04-26 18:07:08 +0200 |
| commit | c1732d3bbf5bc46d4f91c3a2f45cc88125083d88 (patch) | |
| tree | 9e54dce0628d9f67c246b1d06e63f786886c50b2 /src/lxml | |
| parent | 1ce10a552b45d81c287ad5ffc66b32ebef6266ae (diff) | |
| download | python-lxml-c1732d3bbf5bc46d4f91c3a2f45cc88125083d88.tar.gz | |
Add C14N 2.0 implementation.
Diffstat (limited to 'src/lxml')
57 files changed, 794 insertions, 1 deletions
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi index d0e7ef56..7bc69202 100644 --- a/src/lxml/serializer.pxi +++ b/src/lxml/serializer.pxi @@ -856,6 +856,326 @@ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments, message = errors[0].message raise C14NError(message) + +# C14N 2.0 + +def canonicalize(write, xml_data=None, *, file=None, **options): + """Convert XML to its C14N 2.0 serialised form. + + The C14N serialised output is written using the *write* function. + To write to a file, open it in text mode with encoding "utf-8" and pass + its ``.write`` method. + + Either *xml_data* (an XML string) or *file* (a file-like object) must be + provided as input. + + The configuration options are the same as for the ``C14NWriterTarget``. + """ + cdef _FeedParser parser = XMLParser( + target=C14NWriterTarget(write, **options), + attribute_defaults=True, + collect_ids=False, + ) + + try: + if xml_data is not None: + parser.feed(xml_data) + elif file is not None: + d = file.read(64*1024) + while d: + parser.feed(d) + d = file.read(64*1024) + finally: + parser.close() + + +cdef object _looks_like_prefix_name = re.compile('^\w+:\w+$', re.UNICODE).match + + +cdef class C14NWriterTarget: + """ + Canonicalization writer target for the XMLParser. + + Serialises parse events to XML C14N 2.0. + + Configuration options: + + - *comments*: set to true to include comments + - *strip_text*: set to true to strip whitespace before and after text content + - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}" + - *qname_aware_tags*: a set of qname aware tag names in which prefixes + should be replaced in text content + - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes + should be replaced in text content + """ + cdef object _write + cdef list _data + cdef set _qname_aware_tags + cdef object _find_qname_aware_attrs + cdef list _declared_ns_stack + cdef list _ns_stack + cdef dict _prefix_map + cdef list _preserve_space + cdef tuple _pending_start + cdef bint _comments + cdef bint _strip_text + cdef bint _rewrite_prefixes + cdef bint _root_seen + cdef bint _root_done + + def __init__(self, write, *, + comments=False, strip_text=False, rewrite_prefixes=False, + qname_aware_tags=None, qname_aware_attrs=None): + self._write = write + self._data = [] + self._comments = comments + self._strip_text = strip_text + + self._rewrite_prefixes = rewrite_prefixes + if qname_aware_tags: + self._qname_aware_tags = set(qname_aware_tags) + else: + self._qname_aware_tags = None + if qname_aware_attrs: + self._find_qname_aware_attrs = set(qname_aware_attrs).intersection + else: + self._find_qname_aware_attrs = None + + # Stack with globally and newly declared namespaces as (uri, prefix) pairs. + self._declared_ns_stack = [[ + ("http://www.w3.org/XML/1998/namespace", "xml"), + ]] + # Stack with user declared namespace prefixes as (uri, prefix) pairs. + self._ns_stack = [] + if not rewrite_prefixes: + self._ns_stack.append(list(_DEFAULT_NAMESPACE_PREFIXES.items())) + self._ns_stack.append([]) + self._prefix_map = {} + self._preserve_space = [False] + self._pending_start = None + self._root_seen = False + self._root_done = False + + def _iter_namespaces(self, ns_stack): + for namespaces in reversed(ns_stack): + if namespaces: # almost no element declares new namespaces + yield from namespaces + + cdef _resolve_prefix_name(self, prefixed_name): + prefix, name = prefixed_name.split(':', 1) + for uri, p in self._iter_namespaces(self._ns_stack): + if p == prefix: + return f'{{{uri}}}{name}' + raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope') + + cdef _qname(self, qname, uri=None): + if uri is None: + uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname) + else: + tag = qname + + prefixes_seen = set() + for u, prefix in self._iter_namespaces(self._declared_ns_stack): + if u == uri and prefix not in prefixes_seen: + return f'{prefix}:{tag}' if prefix else tag, tag, uri + prefixes_seen.add(prefix) + + # Not declared yet => add new declaration. + if self._rewrite_prefixes: + if uri in self._prefix_map: + prefix = self._prefix_map[uri] + else: + prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}' + self._declared_ns_stack[-1].append((uri, prefix)) + return f'{prefix}:{tag}', tag, uri + + if not uri and '' not in prefixes_seen: + # No default namespace declared => no prefix needed. + return tag, tag, uri + + for u, prefix in self._iter_namespaces(self._ns_stack): + if u == uri: + self._declared_ns_stack[-1].append((uri, prefix)) + return f'{prefix}:{tag}' if prefix else tag, tag, uri + + raise ValueError(f'Namespace "{uri}" is not declared in scope') + + def data(self, data): + self._data.append(data) + + cdef _flush(self): + data = u''.join(self._data) + del self._data[:] + if self._strip_text and not self._preserve_space[-1]: + data = data.strip() + if self._pending_start is not None: + (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None + qname_text = data if u':' in data and _looks_like_prefix_name(data) else None + self._start(tag, attrs, new_namespaces, qname_text) + if qname_text is not None: + return + if data and self._root_seen: + self._write(_escape_cdata_c14n(data)) + + def start_ns(self, prefix, uri): + # we may have to resolve qnames in text content + if self._data: + self._flush() + self._ns_stack[-1].append((uri, prefix)) + + def start(self, tag, attrs): + if self._data: + self._flush() + + new_namespaces = [] + self._declared_ns_stack.append(new_namespaces) + + if self._qname_aware_tags is not None and tag in self._qname_aware_tags: + # Need to parse text first to see if it requires a prefix declaration. + self._pending_start = (tag, attrs, new_namespaces) + return + self._start(tag, attrs, new_namespaces) + + cdef _start(self, tag, attrs, new_namespaces, qname_text=None): + qnames = {tag, *attrs} + resolved_names = {} + + # Resolve prefixes in attribute and tag text. + if qname_text is not None: + qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text) + qnames.add(qname) + if self._find_qname_aware_attrs is not None and attrs: + qattrs = self._find_qname_aware_attrs(attrs) + if qattrs: + for attr_name in qattrs: + value = attrs[attr_name] + if _looks_like_prefix_name(value): + qname = resolved_names[value] = self._resolve_prefix_name(value) + qnames.add(qname) + else: + qattrs = None + else: + qattrs = None + + # Assign prefixes in lexicographical order of used URIs. + parsed_qnames = {n: self._qname(n) for n in sorted( + qnames, key=lambda n: n.split('}', 1))} + + # Write namespace declarations in prefix order ... + attr_list = sorted( + (u'xmlns:' + prefix if prefix else u'xmlns', uri) + for uri, prefix in new_namespaces + ) if new_namespaces else [] # almost always empty + + # ... followed by attributes in URI+name order + for k, v in sorted(attrs.items()): + if qattrs is not None and k in qattrs and v in resolved_names: + v = parsed_qnames[resolved_names[v]][0] + attr_qname, attr_name, uri = parsed_qnames[k] + # No prefix for attributes in default ('') namespace. + attr_list.append((attr_qname if uri else attr_name, v)) + + # Honour xml:space attributes. + space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space') + self._preserve_space.append( + space_behaviour == 'preserve' if space_behaviour + else self._preserve_space[-1]) + + # Write the tag. + write = self._write + write(u'<' + parsed_qnames[tag][0]) + if attr_list: + write(u''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list])) + write(u'>') + + # Write the resolved qname text content. + if qname_text is not None: + write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0])) + + self._root_seen = True + self._ns_stack.append([]) + + def end(self, tag): + if self._data: + self._flush() + self._write(f'</{self._qname(tag)[0]}>') + self._preserve_space.pop() + self._root_done = len(self._preserve_space) == 1 + self._declared_ns_stack.pop() + self._ns_stack.pop() + + def comment(self, text): + if not self._comments: + return + if self._root_done: + self._write(u'\n') + elif self._root_seen and self._data: + self._flush() + self._write(f'<!--{_escape_cdata_c14n(text)}-->') + if not self._root_seen: + self._write(u'\n') + + def pi(self, target, data): + if self._root_done: + self._write(u'\n') + elif self._root_seen and self._data: + self._flush() + self._write( + f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>') + if not self._root_seen: + self._write(u'\n') + + def close(self): + return None + + +cdef _raise_serialization_error(text): + raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__)) + + +cdef unicode _escape_cdata_c14n(stext): + # escape character data + cdef unicode text + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + text = unicode(stext) + if u'&' in text: + text = text.replace(u'&', u'&') + if u'<' in text: + text = text.replace(u'<', u'<') + if u'>' in text: + text = text.replace(u'>', u'>') + if u'\r' in text: + text = text.replace(u'\r', u'
') + return text + except (TypeError, AttributeError): + _raise_serialization_error(stext) + + +cdef unicode _escape_attrib_c14n(stext): + # escape attribute value + cdef unicode text + try: + text = unicode(stext) + if u'&' in text: + text = text.replace(u'&', u'&') + if u'<' in text: + text = text.replace(u'<', u'<') + if u'"' in text: + text = text.replace(u'"', u'"') + if u'\t' in text: + text = text.replace(u'\t', u'	') + if u'\n' in text: + text = text.replace(u'\n', u'
') + if u'\r' in text: + text = text.replace(u'\r', u'
') + return text + except (TypeError, AttributeError): + _raise_serialization_error(stext) + + # incremental serialisation cdef class xmlfile: diff --git a/src/lxml/tests/c14n-20/c14nComment.xml b/src/lxml/tests/c14n-20/c14nComment.xml new file mode 100644 index 00000000..e95aa302 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nComment.xml @@ -0,0 +1,4 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:IgnoreComments>true</c14n2:IgnoreComments> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nDefault.xml b/src/lxml/tests/c14n-20/c14nDefault.xml new file mode 100644 index 00000000..c1364142 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nDefault.xml @@ -0,0 +1,3 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" Algorithm="http://www.w3.org/2010/xml-c14n2"> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nPrefix.xml b/src/lxml/tests/c14n-20/c14nPrefix.xml new file mode 100644 index 00000000..fb233b42 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nPrefix.xml @@ -0,0 +1,4 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nPrefixQname.xml b/src/lxml/tests/c14n-20/c14nPrefixQname.xml new file mode 100644 index 00000000..23188eed --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nPrefixQname.xml @@ -0,0 +1,7 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> + <c14n2:QNameAware> + <c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/> + </c14n2:QNameAware> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml b/src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml new file mode 100644 index 00000000..626fc48f --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nPrefixQnameXpathElem.xml @@ -0,0 +1,8 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> + <c14n2:QNameAware> + <c14n2:Element Name="bar" NS="http://a"/> + <c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/> + </c14n2:QNameAware> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nQname.xml b/src/lxml/tests/c14n-20/c14nQname.xml new file mode 100644 index 00000000..919e5903 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nQname.xml @@ -0,0 +1,6 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:QNameAware> + <c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/> + </c14n2:QNameAware> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nQnameElem.xml b/src/lxml/tests/c14n-20/c14nQnameElem.xml new file mode 100644 index 00000000..0321f806 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nQnameElem.xml @@ -0,0 +1,6 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:QNameAware> + <c14n2:Element Name="bar" NS="http://a"/> + </c14n2:QNameAware> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nQnameXpathElem.xml b/src/lxml/tests/c14n-20/c14nQnameXpathElem.xml new file mode 100644 index 00000000..c4890bc8 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nQnameXpathElem.xml @@ -0,0 +1,7 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:QNameAware> + <c14n2:Element Name="bar" NS="http://a"/> + <c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/> + </c14n2:QNameAware> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/c14nTrim.xml b/src/lxml/tests/c14n-20/c14nTrim.xml new file mode 100644 index 00000000..ccb9cf65 --- /dev/null +++ b/src/lxml/tests/c14n-20/c14nTrim.xml @@ -0,0 +1,4 @@ +<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2"> + <c14n2:TrimTextNodes>true</c14n2:TrimTextNodes> +</dsig:CanonicalizationMethod> + diff --git a/src/lxml/tests/c14n-20/doc.dtd b/src/lxml/tests/c14n-20/doc.dtd new file mode 100644 index 00000000..5c5d544a --- /dev/null +++ b/src/lxml/tests/c14n-20/doc.dtd @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!ELEMENT doc (#PCDATA)> + + + diff --git a/src/lxml/tests/c14n-20/doc.xsl b/src/lxml/tests/c14n-20/doc.xsl new file mode 100644 index 00000000..a3f2348c --- /dev/null +++ b/src/lxml/tests/c14n-20/doc.xsl @@ -0,0 +1,5 @@ +<?xml version="1.0"?> +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + > +</xsl:stylesheet> diff --git a/src/lxml/tests/c14n-20/inC14N1.xml b/src/lxml/tests/c14n-20/inC14N1.xml new file mode 100644 index 00000000..ed450c73 --- /dev/null +++ b/src/lxml/tests/c14n-20/inC14N1.xml @@ -0,0 +1,14 @@ +<?xml version="1.0"?> + +<?xml-stylesheet href="doc.xsl" + type="text/xsl" ?> + +<!DOCTYPE doc SYSTEM "doc.dtd"> + +<doc>Hello, world!<!-- Comment 1 --></doc> + +<?pi-without-data ?> + +<!-- Comment 2 --> + +<!-- Comment 3 --> diff --git a/src/lxml/tests/c14n-20/inC14N2.xml b/src/lxml/tests/c14n-20/inC14N2.xml new file mode 100644 index 00000000..74eeea14 --- /dev/null +++ b/src/lxml/tests/c14n-20/inC14N2.xml @@ -0,0 +1,11 @@ +<doc> + <clean> </clean> + <dirty> A B </dirty> + <mixed> + A + <clean> </clean> + B + <dirty> A B </dirty> + C + </mixed> +</doc> diff --git a/src/lxml/tests/c14n-20/inC14N3.xml b/src/lxml/tests/c14n-20/inC14N3.xml new file mode 100644 index 00000000..fea78213 --- /dev/null +++ b/src/lxml/tests/c14n-20/inC14N3.xml @@ -0,0 +1,18 @@ +<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]> +<doc> + <e1 /> + <e2 ></e2> + <e3 name = "elem3" id="elem3" /> + <e4 name="elem4" id="elem4" ></e4> + <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm" + xmlns:b="http://www.ietf.org" + xmlns:a="http://www.w3.org" + xmlns="http://example.org"/> + <e6 xmlns="" xmlns:a="http://www.w3.org"> + <e7 xmlns="http://www.ietf.org"> + <e8 xmlns="" xmlns:a="http://www.w3.org"> + <e9 xmlns="" xmlns:a="http://www.ietf.org"/> + </e8> + </e7> + </e6> +</doc> diff --git a/src/lxml/tests/c14n-20/inC14N4.xml b/src/lxml/tests/c14n-20/inC14N4.xml new file mode 100644 index 00000000..909a8474 --- /dev/null +++ b/src/lxml/tests/c14n-20/inC14N4.xml @@ -0,0 +1,13 @@ +<!DOCTYPE doc [ +<!ATTLIST normId id ID #IMPLIED> +<!ATTLIST normNames attr NMTOKENS #IMPLIED> +]> +<doc> + <text>First line
 Second line</text> + <value>2</value> + <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute> + <compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute> + <norm attr=' '   
	 ' '/> + <normNames attr=' A   
	 B '/> + <normId id=' '  
	 ' '/> +</doc> diff --git a/src/lxml/tests/c14n-20/inC14N5.xml b/src/lxml/tests/c14n-20/inC14N5.xml new file mode 100644 index 00000000..501161ba --- /dev/null +++ b/src/lxml/tests/c14n-20/inC14N5.xml @@ -0,0 +1,12 @@ +<!DOCTYPE doc [ +<!ATTLIST doc attrExtEnt CDATA #IMPLIED> +<!ENTITY ent1 "Hello"> +<!ENTITY ent2 SYSTEM "world.txt"> +<!ENTITY entExt SYSTEM "earth.gif" NDATA gif> +<!NOTATION gif SYSTEM "viewgif.exe"> +]> +<doc attrExtEnt="entExt"> + &ent1;, &ent2;! +</doc> + +<!-- Let world.txt contain "world" (excluding the quotes) --> diff --git a/src/lxml/tests/c14n-20/inC14N6.xml b/src/lxml/tests/c14n-20/inC14N6.xml new file mode 100644 index 00000000..31e20718 --- /dev/null +++ b/src/lxml/tests/c14n-20/inC14N6.xml @@ -0,0 +1,2 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> +<doc>©</doc> diff --git a/src/lxml/tests/c14n-20/inNsContent.xml b/src/lxml/tests/c14n-20/inNsContent.xml new file mode 100644 index 00000000..b9924660 --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsContent.xml @@ -0,0 +1,4 @@ +<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:child="http://c" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> + <a:bar>xsd:string</a:bar> + <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath> +</a:foo> diff --git a/src/lxml/tests/c14n-20/inNsDefault.xml b/src/lxml/tests/c14n-20/inNsDefault.xml new file mode 100644 index 00000000..3e0d323b --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsDefault.xml @@ -0,0 +1,3 @@ +<foo xmlns:a="http://a" xmlns:b="http://b"> + <b:bar b:att1="val" att2="val"/> +</foo> diff --git a/src/lxml/tests/c14n-20/inNsPushdown.xml b/src/lxml/tests/c14n-20/inNsPushdown.xml new file mode 100644 index 00000000..daa67d83 --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsPushdown.xml @@ -0,0 +1,6 @@ +<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:c="http://c"> + <b:bar/> + <b:bar/> + <b:bar/> + <a:bar b:att1="val"/> +</a:foo> diff --git a/src/lxml/tests/c14n-20/inNsRedecl.xml b/src/lxml/tests/c14n-20/inNsRedecl.xml new file mode 100644 index 00000000..10bd97be --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsRedecl.xml @@ -0,0 +1,3 @@ +<foo xmlns:a="http://z3" xmlns:b="http://z2" a:att1="val1" b:att2="val2"> + <bar xmlns="http://z0" xmlns:a="http://z2" a:att1="val1" b:att2="val2" xmlns:b="http://z3" /> +</foo> diff --git a/src/lxml/tests/c14n-20/inNsSort.xml b/src/lxml/tests/c14n-20/inNsSort.xml new file mode 100644 index 00000000..8e9fc01c --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsSort.xml @@ -0,0 +1,4 @@ +<a:foo xmlns:a="http://z3" xmlns:b="http://z2" b:att1="val1" c:att3="val3" b:att2="val2" xmlns:c="http://z1" xmlns:d="http://z0"> + <c:bar/> + <c:bar d:att3="val3"/> +</a:foo> diff --git a/src/lxml/tests/c14n-20/inNsSuperfluous.xml b/src/lxml/tests/c14n-20/inNsSuperfluous.xml new file mode 100644 index 00000000..f77720f7 --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsSuperfluous.xml @@ -0,0 +1,4 @@ +<foo xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2" xmlns="http://z0"> + <c:bar xmlns:a="http://z0" xmlns:c="http://z0" c:att3="val3"/> + <d:bar xmlns:d="http://z0"/> +</foo> diff --git a/src/lxml/tests/c14n-20/inNsXml.xml b/src/lxml/tests/c14n-20/inNsXml.xml new file mode 100644 index 00000000..7520cf3f --- /dev/null +++ b/src/lxml/tests/c14n-20/inNsXml.xml @@ -0,0 +1,3 @@ +<foo xmlns="http://z0" xml:id="23"> + <bar xsi:type="xsd:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">data</bar> +</foo> diff --git a/src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml b/src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml new file mode 100644 index 00000000..d98d1684 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N1_c14nComment.xml @@ -0,0 +1,6 @@ +<?xml-stylesheet href="doc.xsl" + type="text/xsl" ?> +<doc>Hello, world!<!-- Comment 1 --></doc> +<?pi-without-data?> +<!-- Comment 2 --> +<!-- Comment 3 -->
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml new file mode 100644 index 00000000..af9a9770 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N1_c14nDefault.xml @@ -0,0 +1,4 @@ +<?xml-stylesheet href="doc.xsl" + type="text/xsl" ?> +<doc>Hello, world!</doc> +<?pi-without-data?>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml new file mode 100644 index 00000000..2afa15cc --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N2_c14nDefault.xml @@ -0,0 +1,11 @@ +<doc> + <clean> </clean> + <dirty> A B </dirty> + <mixed> + A + <clean> </clean> + B + <dirty> A B </dirty> + C + </mixed> +</doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml new file mode 100644 index 00000000..7a1dc329 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N2_c14nTrim.xml @@ -0,0 +1 @@ +<doc><clean></clean><dirty>A B</dirty><mixed>A<clean></clean>B<dirty>A B</dirty>C</mixed></doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml new file mode 100644 index 00000000..662e108a --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N3_c14nDefault.xml @@ -0,0 +1,14 @@ +<doc> + <e1></e1> + <e2></e2> + <e3 id="elem3" name="elem3"></e3> + <e4 id="elem4" name="elem4"></e4> + <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5> + <e6> + <e7 xmlns="http://www.ietf.org"> + <e8 xmlns=""> + <e9 attr="default"></e9> + </e8> + </e7> + </e6> +</doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml new file mode 100644 index 00000000..041e1ec8 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N3_c14nPrefix.xml @@ -0,0 +1,14 @@ +<n0:doc xmlns:n0=""> + <n0:e1></n0:e1> + <n0:e2></n0:e2> + <n0:e3 id="elem3" name="elem3"></n0:e3> + <n0:e4 id="elem4" name="elem4"></n0:e4> + <n1:e5 xmlns:n1="http://example.org" xmlns:n2="http://www.ietf.org" xmlns:n3="http://www.w3.org" attr="I'm" attr2="all" n2:attr="sorted" n3:attr="out"></n1:e5> + <n0:e6> + <n2:e7 xmlns:n2="http://www.ietf.org"> + <n0:e8> + <n0:e9 attr="default"></n0:e9> + </n0:e8> + </n2:e7> + </n0:e6> +</n0:doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml new file mode 100644 index 00000000..4f35ad96 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N3_c14nTrim.xml @@ -0,0 +1 @@ +<doc><e1></e1><e2></e2><e3 id="elem3" name="elem3"></e3><e4 id="elem4" name="elem4"></e4><e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5><e6><e7 xmlns="http://www.ietf.org"><e8 xmlns=""><e9 attr="default"></e9></e8></e7></e6></doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml new file mode 100644 index 00000000..243d0e61 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N4_c14nDefault.xml @@ -0,0 +1,10 @@ +<doc> + <text>First line
 +Second line</text> + <value>2</value> + <compute>value>"0" && value<"10" ?"valid":"error"</compute> + <compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute> + <norm attr=" ' 
	 ' "></norm> + <normNames attr="A 
	 B"></normNames> + <normId id="' 
	 '"></normId> +</doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml new file mode 100644 index 00000000..24d83ba8 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N4_c14nTrim.xml @@ -0,0 +1,2 @@ +<doc><text>First line
 +Second line</text><value>2</value><compute>value>"0" && value<"10" ?"valid":"error"</compute><compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute><norm attr=" ' 
	 ' "></norm><normNames attr="A 
	 B"></normNames><normId id="' 
	 '"></normId></doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml new file mode 100644 index 00000000..c232e740 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N5_c14nDefault.xml @@ -0,0 +1,3 @@ +<doc attrExtEnt="entExt"> + Hello, world! +</doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml b/src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml new file mode 100644 index 00000000..3fa84b1e --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N5_c14nTrim.xml @@ -0,0 +1 @@ +<doc attrExtEnt="entExt">Hello, world!</doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml new file mode 100644 index 00000000..0be38f98 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inC14N6_c14nDefault.xml @@ -0,0 +1 @@ +<doc>©</doc>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml new file mode 100644 index 00000000..62d7e004 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nDefault.xml @@ -0,0 +1,4 @@ +<a:foo xmlns:a="http://a"> + <a:bar>xsd:string</a:bar> + <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath> +</a:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml new file mode 100644 index 00000000..20e1c2e9 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml @@ -0,0 +1,4 @@ +<n0:foo xmlns:n0="http://a"> + <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema">n1:string</n0:bar> + <n4:IncludedXPath xmlns:n2="http://b" xmlns:n3="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:n4="http://www.w3.org/2010/xmldsig2#">/n3:body/child::n2:foo[@att1 != "c:val" and @att2 != 'xsd:string']</n4:IncludedXPath> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml new file mode 100644 index 00000000..db8680da --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameElem.xml @@ -0,0 +1,4 @@ +<a:foo xmlns:a="http://a"> + <a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar> + <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath> +</a:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml new file mode 100644 index 00000000..df3b2157 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsContent_c14nQnameXpathElem.xml @@ -0,0 +1,4 @@ +<a:foo xmlns:a="http://a"> + <a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar> + <dsig2:IncludedXPath xmlns:b="http://b" xmlns:dsig2="http://www.w3.org/2010/xmldsig2#" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath> +</a:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml new file mode 100644 index 00000000..674b076d --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsDefault_c14nDefault.xml @@ -0,0 +1,3 @@ +<foo> + <b:bar xmlns:b="http://b" att2="val" b:att1="val"></b:bar> +</foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml new file mode 100644 index 00000000..83edaae9 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsDefault_c14nPrefix.xml @@ -0,0 +1,3 @@ +<n0:foo xmlns:n0=""> + <n1:bar xmlns:n1="http://b" att2="val" n1:att1="val"></n1:bar> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml new file mode 100644 index 00000000..fa4f21b5 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nDefault.xml @@ -0,0 +1,6 @@ +<a:foo xmlns:a="http://a"> + <b:bar xmlns:b="http://b"></b:bar> + <b:bar xmlns:b="http://b"></b:bar> + <b:bar xmlns:b="http://b"></b:bar> + <a:bar xmlns:b="http://b" b:att1="val"></a:bar> +</a:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml new file mode 100644 index 00000000..6d579200 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsPushdown_c14nPrefix.xml @@ -0,0 +1,6 @@ +<n0:foo xmlns:n0="http://a"> + <n1:bar xmlns:n1="http://b"></n1:bar> + <n1:bar xmlns:n1="http://b"></n1:bar> + <n1:bar xmlns:n1="http://b"></n1:bar> + <n0:bar xmlns:n1="http://b" n1:att1="val"></n0:bar> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml new file mode 100644 index 00000000..ba37f925 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nDefault.xml @@ -0,0 +1,3 @@ +<foo xmlns:a="http://z3" xmlns:b="http://z2" b:att2="val2" a:att1="val1"> + <bar xmlns="http://z0" xmlns:a="http://z2" xmlns:b="http://z3" a:att1="val1" b:att2="val2"></bar> +</foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml new file mode 100644 index 00000000..af3bb2d6 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsRedecl_c14nPrefix.xml @@ -0,0 +1,3 @@ +<n0:foo xmlns:n0="" xmlns:n1="http://z2" xmlns:n2="http://z3" n1:att2="val2" n2:att1="val1"> + <n3:bar xmlns:n3="http://z0" n1:att1="val1" n2:att2="val2"></n3:bar> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml new file mode 100644 index 00000000..8a92c5c6 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsSort_c14nDefault.xml @@ -0,0 +1,4 @@ +<a:foo xmlns:a="http://z3" xmlns:b="http://z2" xmlns:c="http://z1" c:att3="val3" b:att1="val1" b:att2="val2"> + <c:bar></c:bar> + <c:bar xmlns:d="http://z0" d:att3="val3"></c:bar> +</a:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml new file mode 100644 index 00000000..8d44c84f --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsSort_c14nPrefix.xml @@ -0,0 +1,4 @@ +<n2:foo xmlns:n0="http://z1" xmlns:n1="http://z2" xmlns:n2="http://z3" n0:att3="val3" n1:att1="val1" n1:att2="val2"> + <n0:bar></n0:bar> + <n0:bar xmlns:n3="http://z0" n3:att3="val3"></n0:bar> +</n2:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml new file mode 100644 index 00000000..6bb862d7 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nDefault.xml @@ -0,0 +1,4 @@ +<foo xmlns="http://z0" xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2"> + <c:bar xmlns:c="http://z0" c:att3="val3"></c:bar> + <d:bar xmlns:d="http://z0"></d:bar> +</foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml new file mode 100644 index 00000000..700a16d4 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsSuperfluous_c14nPrefix.xml @@ -0,0 +1,4 @@ +<n0:foo xmlns:n0="http://z0" n0:att1="val1" n0:att2="val2"> + <n0:bar n0:att3="val3"></n0:bar> + <n0:bar></n0:bar> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml new file mode 100644 index 00000000..1689f3bf --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nDefault.xml @@ -0,0 +1,3 @@ +<foo xmlns="http://z0" xml:id="23"> + <bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar> +</foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml new file mode 100644 index 00000000..38508a47 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefix.xml @@ -0,0 +1,3 @@ +<n0:foo xmlns:n0="http://z0" xml:id="23"> + <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema-instance" n1:type="xsd:string">data</n0:bar> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml new file mode 100644 index 00000000..867980f8 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nPrefixQname.xml @@ -0,0 +1,3 @@ +<n0:foo xmlns:n0="http://z0" xml:id="23"> + <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema" xmlns:n2="http://www.w3.org/2001/XMLSchema-instance" n2:type="n1:string">data</n0:bar> +</n0:foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml b/src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml new file mode 100644 index 00000000..0300f9d5 --- /dev/null +++ b/src/lxml/tests/c14n-20/out_inNsXml_c14nQname.xml @@ -0,0 +1,3 @@ +<foo xmlns="http://z0" xml:id="23"> + <bar xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar> +</foo>
\ No newline at end of file diff --git a/src/lxml/tests/c14n-20/world.txt b/src/lxml/tests/c14n-20/world.txt new file mode 100644 index 00000000..04fea064 --- /dev/null +++ b/src/lxml/tests/c14n-20/world.txt @@ -0,0 +1 @@ +world
\ No newline at end of file diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py index 78701a71..3de74639 100644 --- a/src/lxml/tests/test_elementtree.py +++ b/src/lxml/tests/test_elementtree.py @@ -9,13 +9,15 @@ for IO related test cases. """ import copy +import io import operator import os import re import sys import textwrap import unittest -from functools import wraps +from contextlib import contextmanager +from functools import wraps, partial from itertools import islice this_dir = os.path.dirname(__file__) @@ -4637,6 +4639,171 @@ class _XMLPullParserTest(unittest.TestCase): self.etree.XMLPullParser(events=('start', 'end', 'bogus')) +class _C14NTest(unittest.TestCase): + etree = None + maxDiff = None + + if not hasattr(unittest.TestCase, 'subTest'): + @contextmanager + def subTest(self, name): + try: + yield + except Exception as e: + print("Subtest {} failed: {}".format(name, e)) + raise + + # + # simple roundtrip tests (from c14n.py) + + def c14n_roundtrip(self, xml, **options): + f = io.StringIO() + self.etree.canonicalize(f.write, xml, **options) + return f.getvalue() + + def test_simple_roundtrip(self): + c14n_roundtrip = self.c14n_roundtrip + # Basics + self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>') + self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME + '<doc xmlns="uri"></doc>') + self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"), + '<prefix:doc xmlns:prefix="uri"></prefix:doc>') + self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"), + '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>') + self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"), + '<elem></elem>') + + # C14N spec + self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"), + '<doc>Hello, world!</doc>') + self.assertEqual(c14n_roundtrip("<value>2</value>"), + '<value>2</value>') + self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'), + '<compute>value>"0" && value<"10" ?"valid":"error"</compute>') + self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''), + '<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>') + self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"), + '<norm attr=" \' 
	 \' "></norm>') + self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"), + '<normNames attr=" A 
	 B "></normNames>') + self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"), + '<normId id=" \' 
	 \' "></normId>') + + # fragments from PJ's tests + #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"), + #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>') + + # + # basic method=c14n tests from the c14n 2.0 specification. uses + # test files under xmltestdata/c14n-20. + + # note that this uses generated C14N versions of the standard ET.write + # output, not roundtripped C14N (see above). + + def test_xml_c14n2(self): + datadir = os.path.join(os.path.dirname(__file__), "c14n-20") + full_path = partial(os.path.join, datadir) + + files = [filename[:-4] for filename in sorted(os.listdir(datadir)) + if filename.endswith('.xml')] + input_files = [ + filename for filename in files + if filename.startswith('in') + ] + configs = { + filename: { + # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> + option.tag.split('}')[-1]: ((option.text or '').strip(), option) + for option in self.etree.parse(full_path(filename) + ".xml").getroot() + } + for filename in files + if filename.startswith('c14n') + } + + tests = { + input_file: [ + (filename, configs[filename.rsplit('_', 1)[-1]]) + for filename in files + if filename.startswith('out_%s_' % input_file) + and filename.rsplit('_', 1)[-1] in configs + ] + for input_file in input_files + } + + # Make sure we found all test cases. + self.assertEqual(30, len([ + output_file for output_files in tests.values() + for output_file in output_files])) + + def get_option(config, option_name, default=None): + return config.get(option_name, (default, ()))[0] + + for input_file, output_files in tests.items(): + for output_file, config in output_files: + keep_comments = get_option( + config, 'IgnoreComments') == 'true' # no, it's right :) + strip_text = get_option( + config, 'TrimTextNodes') == 'true' + rewrite_prefixes = get_option( + config, 'PrefixRewrite') == 'sequential' + if 'QNameAware' in config: + qattrs = [ + "{%s}%s" % (el.get('NS'), el.get('Name')) + for el in config['QNameAware'][1].findall( + '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') + ] + qtags = [ + "{%s}%s" % (el.get('NS'), el.get('Name')) + for el in config['QNameAware'][1].findall( + '{http://www.w3.org/2010/xml-c14n2}Element') + ] + else: + qtags = qattrs = None + + # Build subtest description from config. + config_descr = ','.join( + "%s=%s" % (name, value or ','.join(c.tag.split('}')[-1] for c in children)) + for name, (value, children) in sorted(config.items()) + ) + + with self.subTest("{}({})".format(output_file, config_descr)): + if input_file == 'inNsRedecl' and not rewrite_prefixes: + self.skipTest( + "Redeclared namespace handling is not supported in {}".format( + output_file)) + if input_file == 'inNsSuperfluous' and not rewrite_prefixes: + self.skipTest( + "Redeclared namespace handling is not supported in {}".format( + output_file)) + if 'QNameAware' in config and config['QNameAware'][1].find( + '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: + self.skipTest( + "QName rewriting in XPath text is not supported in {}".format( + output_file)) + + out = io.StringIO() + with io.open(full_path(input_file + ".xml"), 'rb') as f: + if input_file == 'inC14N5': + # Hack: avoid setting up external entity resolution in the parser. + with open(full_path('world.txt'), 'rb') as entity_file: + f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read().strip())) + + self.etree.canonicalize( + out.write, file=f, + comments=keep_comments, + strip_text=strip_text, + rewrite_prefixes=rewrite_prefixes, + qname_aware_tags=qtags, qname_aware_attrs=qattrs) + text = out.getvalue() + with io.open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: + expected = f.read() + if input_file == 'inC14N3' and self.etree is not etree: + # FIXME: cET resolves default attributes but ET does not! + expected = expected.replace(' attr="default"', '') + text = text.replace(' attr="default"', '') + self.assertEqual(expected, text) + + if etree: class ETreeTestCase(_ETreeTestCaseBase): etree = etree @@ -4647,6 +4814,9 @@ if etree: class ETreeElementSlicingTest(_ElementSlicingTest): etree = etree + class ETreeC14NTest(_C14NTest): + etree = etree + if ElementTree: class ElementTreeTestCase(_ETreeTestCaseBase): @@ -4671,6 +4841,12 @@ if ElementTree: else: ElementTreePullTestCase = None + if hasattr(ElementTree, 'canonicalize'): + class ElementTreeC14NTest(_C14NTest): + etree = ElementTree + else: + ElementTreeC14NTest = None + class ElementTreeElementSlicingTest(_ElementSlicingTest): etree = ElementTree @@ -4693,10 +4869,13 @@ def test_suite(): suite.addTests([unittest.makeSuite(ETreeTestCase)]) suite.addTests([unittest.makeSuite(ETreePullTestCase)]) suite.addTests([unittest.makeSuite(ETreeElementSlicingTest)]) + suite.addTests([unittest.makeSuite(ETreeC14NTest)]) if ElementTree: suite.addTests([unittest.makeSuite(ElementTreeTestCase)]) if ElementTreePullTestCase: suite.addTests([unittest.makeSuite(ElementTreePullTestCase)]) + if ElementTreeC14NTest: + suite.addTests([unittest.makeSuite(ElementTreeC14NTest)]) suite.addTests([unittest.makeSuite(ElementTreeElementSlicingTest)]) if cElementTree: suite.addTests([unittest.makeSuite(CElementTreeTestCase)]) |
