import unittest, sys from lxml.tests.common_imports import make_doctest, HelperTestCase try: import lxml.html.soupparser BS_INSTALLED = True except ImportError: if 'bs4' in sys.modules or 'BeautifulSoup' in sys.modules: raise # seems we managed to import BS but not soupparser BS_INSTALLED = False from lxml.html import tostring if BS_INSTALLED: class SoupParserTestCase(HelperTestCase): soupparser = lxml.html.soupparser def test_broken_attribute(self): html = """\
""" root = self.soupparser.fromstring(html) self.assertTrue(root.find('.//input').get('disabled') is not None) def test_empty(self): tree = self.soupparser.fromstring('') res = b'''''' self.assertEqual(tostring(tree), res) def test_text(self): tree = self.soupparser.fromstring('huhu') res = b'''huhu''' self.assertEqual(tostring(tree), res) def test_body(self): html = '''test
''' res = b'''test
''' tree = self.soupparser.fromstring(html) self.assertEqual(tostring(tree), res) def test_head_body(self): # HTML tag missing, parser should fix that html = 'test
' res = b'test
' tree = self.soupparser.fromstring(html) self.assertEqual(tostring(tree), res) def test_wrap_html(self): # outside , parser should fix that html = 'test
''' res = b'''test
''' tree = self.soupparser.fromstring(html).getroottree() self.assertEqual(tostring(tree, method='html'), res) def test_doctype1(self): # Test document type declaration, comments and PI's # outside the root html = \ '''Hello world!
''' res = \ b'''Hello world!
''' tree = self.soupparser.fromstring(html).getroottree() self.assertEqual(tree.docinfo.public_id, "-//W3C//DTD HTML 4.01//EN") self.assertEqual(tostring(tree), res) def test_doctype2(self): # Test document type declaration, comments and PI's # outside the root html = \ '''Hello world!
''' res = \ b'''Hello world!
''' tree = self.soupparser.fromstring(html).getroottree() self.assertEqual(tree.docinfo.public_id, "-//IETF//DTD HTML//EN") self.assertEqual(tostring(tree), res) def test_doctype_html5(self): # html 5 doctype declaration html = b'\n' tree = self.soupparser.fromstring(html).getroottree() self.assertTrue(tree.docinfo.public_id is None) self.assertEqual(tostring(tree), html) def test_suite(): suite = unittest.TestSuite() if BS_INSTALLED: suite.addTests([unittest.makeSuite(SoupParserTestCase)]) if sys.version_info[0] < 3: suite.addTests([make_doctest('../../../../doc/elementsoup.txt')]) return suite if __name__ == '__main__': unittest.main()' res = b'