diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-15 13:19:10 +0200 |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-15 13:19:10 +0200 |
commit | 65d36dab4d915eb9fada52b867301b546e840fae (patch) | |
tree | 62d6c6eaf8063e7c1bb263f7e2947c7cdfcbe3b4 /Lib/HTMLParser.py | |
parent | d2307cb48ab09baa846947c5c2c4001dce9b6e52 (diff) | |
download | cpython-git-65d36dab4d915eb9fada52b867301b546e840fae.tar.gz |
#13987: HTMLParser is now able to handle malformed start tags.
Diffstat (limited to 'Lib/HTMLParser.py')
-rw-r--r-- | Lib/HTMLParser.py | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index d2268d02cd..5081a62562 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase): - self.__starttag_text.rfind("\n") else: offset = offset + len(self.__starttag_text) - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos if end.endswith('/>'): # XHTML-style empty tag: <span attr="value" /> self.handle_startendtag(tag, attrs) @@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase): # end of input in or before attribute value, or we have the # '/' from a '/>' ending return -1 - self.updatepos(i, j) - self.error("malformed start tag") + if j > i: + return j + else: + return i + 1 raise AssertionError("we should not get here!") # Internal -- parse endtag, return end or -1 if incomplete |