diff options
Diffstat (limited to 'Lib/urllib.py')
-rw-r--r-- | Lib/urllib.py | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py index 33641a5700..f9655f9e88 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -28,6 +28,7 @@ import os import time import sys import base64 +import re from urlparse import urljoin as basejoin @@ -1198,22 +1199,35 @@ def splitvalue(attr): _hexdig = '0123456789ABCDEFabcdef' _hextochr = dict((a + b, chr(int(a + b, 16))) for a in _hexdig for b in _hexdig) +_asciire = re.compile('([\x00-\x7f]+)') def unquote(s): """unquote('abc%20def') -> 'abc def'.""" - res = s.split('%') + if _is_unicode(s): + if '%' not in s: + return s + bits = _asciire.split(s) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote(str(bits[i])).decode('latin1')) + append(bits[i + 1]) + return ''.join(res) + + bits = s.split('%') # fastpath - if len(res) == 1: + if len(bits) == 1: return s - s = res[0] - for item in res[1:]: + res = [bits[0]] + append = res.append + for item in bits[1:]: try: - s += _hextochr[item[:2]] + item[2:] + append(_hextochr[item[:2]]) + append(item[2:]) except KeyError: - s += '%' + item - except UnicodeDecodeError: - s += unichr(int(item[:2], 16)) + item[2:] - return s + append('%') + append(item) + return ''.join(res) def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" |