diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-15 12:44:23 +0200 |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-15 12:44:23 +0200 |
commit | d2307cb48ab09baa846947c5c2c4001dce9b6e52 (patch) | |
tree | 5667706edf910500c90d351f4114e2b97d1c76df | |
parent | fd7e4964bbe8dcd750c46aa2a96aeaec97e7ef25 (diff) | |
download | cpython-git-d2307cb48ab09baa846947c5c2c4001dce9b6e52.tar.gz |
#13987: HTMLParser is now able to handle EOFs in the middle of a construct.
-rw-r--r-- | Lib/HTMLParser.py | 13 | ||||
-rw-r--r-- | Lib/test/test_htmlparser.py | 16 | ||||
-rw-r--r-- | Misc/NEWS | 3 |
3 files changed, 21 insertions, 11 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index f230c5f163..d2268d02cd 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -170,9 +170,16 @@ class HTMLParser(markupbase.ParserBase): else: break if k < 0: - if end: - self.error("EOF in middle of construct") - break + if not end: + break + k = rawdata.find('>', i + 1) + if k < 0: + k = rawdata.find('<', i + 1) + if k < 0: + k = i + 1 + else: + k += 1 + self.handle_data(rawdata[i:k]) i = self.updatepos(i, k) elif startswith("&#", i): match = charref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 6667512785..ba775abdac 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -204,16 +204,16 @@ text def test_starttag_junk_chars(self): self._run_check("</>", []) self._run_check("</$>", [('comment', '$')]) - self._parse_error("</") - self._parse_error("</a") + self._run_check("</", [('data', '</')]) + self._run_check("</a", [('data', '</a')]) self._parse_error("<a<a>") self._run_check("</a<a>", [('endtag', 'a<a')]) - self._parse_error("<!") - self._parse_error("<a") - self._parse_error("<a foo='bar'") - self._parse_error("<a foo='bar") - self._parse_error("<a foo='>'") - self._parse_error("<a foo='>") + self._run_check("<!", [('data', '<!')]) + self._run_check("<a", [('data', '<a')]) + self._run_check("<a foo='bar'", [('data', "<a foo='bar'")]) + self._run_check("<a foo='bar", [('data', "<a foo='bar")]) + self._run_check("<a foo='>'", [('data', "<a foo='>'")]) + self._run_check("<a foo='>", [('data', "<a foo='>")]) def test_valid_doctypes(self): # from http://www.w3.org/QA/2002/04/valid-dtd-list.html @@ -93,6 +93,9 @@ Core and Builtins Library ------- +- Issue #13987: HTMLParser is now able to handle EOFs in the middle of a + construct. + - Issue #13015: Fix a possible reference leak in defaultdict.__repr__. Patch by Suman Saha. |