From 4b92cc3f7924e455b7e41cf1a66034a44ede0cc0 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Mon, 13 Feb 2012 16:10:44 +0200 Subject: #13960: HTMLParser is now able to handle broken comments. --- Lib/test/test_htmlparser.py | 58 +++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 21 deletions(-) (limited to 'Lib/test/test_htmlparser.py') diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 14ed80c5d0..29a721cf45 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -114,7 +114,7 @@ comment1b--> sample text “ - + """, [ ("data", "\n"), @@ -142,24 +142,6 @@ text ("data", " foo"), ]) - def test_doctype_decl(self): - inside = """\ -DOCTYPE html [ - - - - - - - %paramEntity; - -]""" - self._run_check("" % inside, [ - ("decl", inside), - ]) - def test_bad_nesting(self): # Strangely, this *is* supposed to test that overlapping # elements are allowed. HTMLParser is more geared toward @@ -182,7 +164,8 @@ DOCTYPE html [ ]) def test_illegal_declarations(self): - self._parse_error('') + self._run_check('', + [('comment', 'spacer type="block" height="25"')]) def test_starttag_end_boundary(self): self._run_check("""""", [("starttag", "a", [("b", "<")])]) @@ -233,7 +216,7 @@ DOCTYPE html [ self._parse_error("", [ @@ -449,6 +432,39 @@ class AttributesTestCase(TestCaseBase): [("href", "http://www.example.org/\">;")]), ("data", "spam"), ("endtag", "a")]) + def test_comments(self): + html = ("" + '' + '' + '' + '' + '' + '') + expected = [('comment', " I'm a valid comment "), + ('comment', 'me too!'), + ('comment', '--'), + ('comment', ''), + ('comment', '--I have many hyphens--'), + ('comment', ' I have a > in the middle '), + ('comment', ' and I have -- in the middle! ')] + self._run_check(html, expected) + + def test_broken_comments(self): + html = ('' + '' + '' + '' + '') + expected = [ + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), + ('comment', ''), + ('comment', '<-- this was an empty comment'), + ('comment', '!! another bogus comment !!!'), + ] + self._run_check(html, expected) + def test_condcoms(self): html = ('' '' -- cgit v1.2.1