#13987: HTMLParser is now able to handle malformed start tags.

author: Ezio Melotti <ezio.melotti@gmail.com> 2012-02-15 13:19:10 +0200
committer: Ezio Melotti <ezio.melotti@gmail.com> 2012-02-15 13:19:10 +0200
commit: 65d36dab4d915eb9fada52b867301b546e840fae (patch)
tree: 62d6c6eaf8063e7c1bb263f7e2947c7cdfcbe3b4 /Lib/HTMLParser.py
parent: d2307cb48ab09baa846947c5c2c4001dce9b6e52 (diff)
download: cpython-git-65d36dab4d915eb9fada52b867301b546e840fae.tar.gz
1 files changed, 6 insertions, 4 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
index d2268d02cd..5081a62562 100644
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase):
                          - self.__starttag_text.rfind("\n")
             else:
                 offset = offset + len(self.__starttag_text)
-            self.error("junk characters in start tag: %r"
-                       % (rawdata[k:endpos][:20],))
+            self.handle_data(rawdata[i:endpos])
+            return endpos
         if end.endswith('/>'):
             # XHTML-style empty tag: <span attr="value" />
             self.handle_startendtag(tag, attrs)
@@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase):
                 # end of input in or before attribute value, or we have the
                 # '/' from a '/>' ending
                 return -1
-            self.updatepos(i, j)
-            self.error("malformed start tag")
+            if j > i:
+                return j
+            else:
+                return i + 1
         raise AssertionError("we should not get here!")
 
     # Internal -- parse endtag, return end or -1 if incomplete
author	Ezio Melotti <ezio.melotti@gmail.com>	2012-02-15 13:19:10 +0200
committer	Ezio Melotti <ezio.melotti@gmail.com>	2012-02-15 13:19:10 +0200
commit	65d36dab4d915eb9fada52b867301b546e840fae (patch)
tree	62d6c6eaf8063e7c1bb263f7e2947c7cdfcbe3b4 /Lib/HTMLParser.py
parent	d2307cb48ab09baa846947c5c2c4001dce9b6e52 (diff)
download	cpython-git-65d36dab4d915eb9fada52b867301b546e840fae.tar.gz