diff options
-rw-r--r-- | Lib/test/test_urllib.py | 26 | ||||
-rw-r--r-- | Lib/urllib.py | 4 | ||||
-rw-r--r-- | Lib/urlparse.py | 5 |
3 files changed, 31 insertions, 4 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index d05574cb99..402309ce3e 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -440,6 +440,32 @@ class UnquotingTests(unittest.TestCase): "using unquote(): not all characters escaped: " "%s" % result) + def test_unquoting_badpercent(self): + # Test unquoting on bad percent-escapes + given = '%xab' + expect = given + result = urllib.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + given = '%x' + expect = given + result = urllib.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + given = '%' + expect = given + result = urllib.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + + def test_unquoting_mixed_case(self): + # Test unquoting on mixed-case hex digits in the percent-escapes + given = '%Ab%eA' + expect = '\xab\xea' + result = urllib.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + def test_unquoting_parts(self): # Make sure unquoting works when have non-quoted characters # interspersed diff --git a/Lib/urllib.py b/Lib/urllib.py index 2fdf927f0a..e12d981ad0 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1162,8 +1162,8 @@ def splitvalue(attr): if match: return match.group(1, 2) return attr, None -_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) -_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) +_hexdig = '0123456789ABCDEFabcdef' +_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" diff --git a/Lib/urlparse.py b/Lib/urlparse.py index c41e25eea1..15a394a0be 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -272,8 +272,9 @@ def urldefrag(url): # Cannot use directly from urllib as it would create circular reference. # urllib uses urlparse methods ( urljoin) -_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) -_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) + +_hexdig = '0123456789ABCDEFabcdef' +_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" |