diff options
Diffstat (limited to 'Lib/HTMLParser.py')
-rw-r--r-- | Lib/HTMLParser.py | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 94ebc7f8dc..cd353f8ca0 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -24,22 +24,23 @@ starttagopen = re.compile('<[a-zA-Z]') piclose = re.compile('>') commentclose = re.compile(r'--\s*>') tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') + attrfind = re.compile( - r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') + r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s+ # whitespace before attribute name - (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name - (?:\s*=\s* # value indicator + (?:(?<=['"\s])[^\s/>][^\s/=>]* # attribute name + (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value - |\"[^\"]*\" # LIT-enclosed value - |[^'\">\s]+ # bare value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\s]* # bare value ) - )? - ) - )* + )?\s* + )* + )? \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') @@ -254,6 +255,7 @@ class HTMLParser(markupbase.ParserBase): elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ attrvalue[:1] == '"' == attrvalue[-1:]: attrvalue = attrvalue[1:-1] + if attrvalue: attrvalue = self.unescape(attrvalue) attrs.append((attrname.lower(), attrvalue)) k = m.end() |