diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-21 09:22:16 +0200 |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-21 09:22:16 +0200 |
commit | 36b7361fe76733b3a4944ef92b49bcea4584b740 (patch) | |
tree | 37ae2ab2af68f8334f6de81980ebbdef535b2662 /Lib/HTMLParser.py | |
parent | 9be6c3ddf09ce4bc5768f708867e6b30a5bc78cb (diff) | |
download | cpython-git-36b7361fe76733b3a4944ef92b49bcea4584b740.tar.gz |
HTMLParser is now able to handle slashes in the start tag.
Diffstat (limited to 'Lib/HTMLParser.py')
-rw-r--r-- | Lib/HTMLParser.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 5081a62562..d4e14d4387 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -28,19 +28,19 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*') tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') attrfind = re.compile( - r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') + r'[\s/]*((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:\s+ # whitespace before attribute name - (?:(?<=['"\s])[^\s/>][^\s/=>]* # attribute name + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) - )?\s* + )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace |