summaryrefslogtreecommitdiff
path: root/src/lxml/html
diff options
context:
space:
mode:
authorscoder <none@none>2008-05-24 10:09:04 +0200
committerscoder <none@none>2008-05-24 10:09:04 +0200
commitb778258d36846cce7e69b67ef51bf2b4eead4e44 (patch)
treebeb812d981d7d1ceab1c2fbe662257e84ad5478d /src/lxml/html
parent7064acd7a7b7f2647470dfb96e3b6b22fe21a228 (diff)
downloadpython-lxml-b778258d36846cce7e69b67ef51bf2b4eead4e44.tar.gz
[svn r3734] r4329@delle: sbehnel | 2008-05-24 08:55:30 +0200
Py3 fixes --HG-- branch : trunk
Diffstat (limited to 'src/lxml/html')
-rw-r--r--src/lxml/html/__init__.py2
-rw-r--r--src/lxml/html/tests/test_autolink.txt40
-rw-r--r--src/lxml/html/tests/test_basic.txt20
3 files changed, 31 insertions, 31 deletions
diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py
index 65ff4c0d..81e7d7b2 100644
--- a/src/lxml/html/__init__.py
+++ b/src/lxml/html/__init__.py
@@ -144,7 +144,7 @@ class HtmlMixin(object):
>>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
>>> h.find('.//b').drop_tag()
- >>> print tostring(h)
+ >>> print(tostring(h))
<div>Hello World!</div>
"""
parent = self.getparent()
diff --git a/src/lxml/html/tests/test_autolink.txt b/src/lxml/html/tests/test_autolink.txt
index f4660296..79075b97 100644
--- a/src/lxml/html/tests/test_autolink.txt
+++ b/src/lxml/html/tests/test_autolink.txt
@@ -2,43 +2,43 @@ This tests autolink::
>>> from lxml.html import usedoctest
>>> from lxml.html.clean import autolink_html
- >>> print autolink_html('''
+ >>> print(autolink_html('''
... <div>Link here: http://test.com/foo.html.</div>
- ... ''')
+ ... '''))
<div>Link here: <a href="http://test.com/foo.html">http://test.com/foo.html</a>.</div>
- >>> print autolink_html('''
+ >>> print(autolink_html('''
... <div>Mail me at mailto:ianb@test.com or http://myhome.com</div>
- ... ''')
+ ... '''))
<div>Mail me at <a href="mailto:ianb@test.com">ianb@test.com</a>
or <a href="http://myhome.com">http://myhome.com</a></div>
- >>> print autolink_html('''
+ >>> print(autolink_html('''
... <div>The <b>great</b> thing is the http://link.com links <i>and</i>
- ... the http://foobar.com links.</div>''')
+ ... the http://foobar.com links.</div>'''))
<div>The <b>great</b> thing is the <a href="http://link.com">http://link.com</a> links <i>and</i>
the <a href="http://foobar.com">http://foobar.com</a> links.</div>
- >>> print autolink_html('''
- ... <div>Link: &lt;http://foobar.com&gt;</div>''')
+ >>> print(autolink_html('''
+ ... <div>Link: &lt;http://foobar.com&gt;</div>'''))
<div>Link: &lt;<a href="http://foobar.com">http://foobar.com</a>&gt;</div>
- >>> print autolink_html('''
- ... <div>Link: (http://foobar.com)</div>''')
+ >>> print(autolink_html('''
+ ... <div>Link: (http://foobar.com)</div>'''))
<div>Link: (<a href="http://foobar.com">http://foobar.com</a>)</div>
Some cases that won't be caught (on purpose)::
- >>> print autolink_html('''
+ >>> print(autolink_html('''
... <div>A link to http://localhost/foo/bar won't, but a link to
- ... http://test.com will</div>''')
+ ... http://test.com will</div>'''))
<div>A link to http://localhost/foo/bar won't, but a link to
<a href="http://test.com">http://test.com</a> will</div>
- >>> print autolink_html('''
- ... <div>A link in <textarea>http://test.com</textarea></div>''')
+ >>> print(autolink_html('''
+ ... <div>A link in <textarea>http://test.com</textarea></div>'''))
<div>A link in <textarea>http://test.com</textarea></div>
- >>> print autolink_html('''
- ... <div>A link in <a href="http://foo.com">http://bar.com</a></div>''')
+ >>> print(autolink_html('''
+ ... <div>A link in <a href="http://foo.com">http://bar.com</a></div>'''))
<div>A link in <a href="http://foo.com">http://bar.com</a></div>
- >>> print autolink_html('''
+ >>> print(autolink_html('''
... <div>A link in <code>http://foo.com</code> or
- ... <span class="nolink">http://bar.com</span></div>''')
+ ... <span class="nolink">http://bar.com</span></div>'''))
<div>A link in <code>http://foo.com</code> or
<span class="nolink">http://bar.com</span></div>
@@ -47,7 +47,7 @@ after autolink::
>>> from lxml.html.clean import word_break_html
>>> def pascii(s):
- ... print s.decode('utf8').encode('ascii', 'xmlcharrefreplace')
+ ... print(s.decode('utf8').encode('ascii', 'xmlcharrefreplace').decode('ascii'))
>>> pascii(word_break_html('''
... <div>Hey you
... 12345678901234567890123456789012345678901234567890</div>'''))
@@ -65,4 +65,4 @@ Not everything is broken:
... <a href="12345678901234567890123456789012345678901234567890">text</a>'''))
<a href="12345678901234567890123456789012345678901234567890">text</a>
- \ No newline at end of file
+
diff --git a/src/lxml/html/tests/test_basic.txt b/src/lxml/html/tests/test_basic.txt
index 416cdead..c787a0e3 100644
--- a/src/lxml/html/tests/test_basic.txt
+++ b/src/lxml/html/tests/test_basic.txt
@@ -11,7 +11,7 @@ lxml.html adds a find_class method to elements::
... fn url" href="foobar">P1</a>
... <a class="not-fn vcard" href="baz">P2</a>
... </body></html>''')
- >>> print tostring(h)
+ >>> print(tostring(h))
<html>
<head></head>
<body>
@@ -20,9 +20,9 @@ lxml.html adds a find_class method to elements::
<a class="not-fn vcard" href="baz">P2</a>
</body>
</html>
- >>> print [e.text for e in h.find_class('fn')]
+ >>> print([e.text for e in h.find_class('fn')])
['P1']
- >>> print [e.text for e in h.find_class('vcard')]
+ >>> print([e.text for e in h.find_class('vcard')])
['P1', 'P2']
Also added is a get_rel_links, which you can use to search for links
@@ -33,17 +33,17 @@ like ``<a rel="$something">``::
... <a href="2" rel="tag">item 2</a>
... <a href="3" rel="tagging">item 3</a>
... <a href="4" rel="TAG">item 4</a>''')
- >>> print [e.attrib['href'] for e in h.find_rel_links('tag')]
+ >>> print([e.attrib['href'] for e in h.find_rel_links('tag')])
['2', '4']
- >>> print [e.attrib['href'] for e in h.find_rel_links('nofollow')]
+ >>> print([e.attrib['href'] for e in h.find_rel_links('nofollow')])
[]
Another method is ``get_element_by_id`` that does what it says::
- >>> print tostring(fragment_fromstring('''
+ >>> print(tostring(fragment_fromstring('''
... <div>
... <span id="test">stuff</span>
- ... </div>''').get_element_by_id('test'))
+ ... </div>''').get_element_by_id('test')))
<span id="test">stuff</span>
Or to get the content of an element without the tags, use text_content()::
@@ -66,7 +66,7 @@ Or drop an element (leaving its content) or the entire tree, like::
... </body>
... </html>''')
>>> doc.get_element_by_id('link').drop_tag()
- >>> print tostring(doc)
+ >>> print(tostring(doc))
<html>
<body>
<div id="body">
@@ -77,7 +77,7 @@ Or drop an element (leaving its content) or the entire tree, like::
</body>
</html>
>>> doc.get_element_by_id('body').drop_tree()
- >>> print tostring(doc)
+ >>> print(tostring(doc))
<html>
<body>
<!-- a comment -->
@@ -90,7 +90,7 @@ drop the comment. Here, ``drop_tag()`` behaves exactly like ``drop_tree()``:
>>> for comment in doc.getiterator(Comment):
... comment.drop_tag()
- >>> print tostring(doc)
+ >>> print(tostring(doc))
<html>
<body>
<div>footer</div>