summaryrefslogtreecommitdiff
path: root/Lib/urlparse.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urlparse.py')
-rw-r--r--Lib/urlparse.py86
1 files changed, 51 insertions, 35 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 294545c8c0..b42e0f4c62 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -5,6 +5,9 @@ urlparse module is based upon the following RFC specifications.
RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
and L. Masinter, January 2005.
+RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
+and L.Masinter, December 1999.
+
RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
Berners-Lee, R. Fielding, and L. Masinter, August 1998.
@@ -31,7 +34,8 @@ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
'wais', 'file', 'https', 'shttp', 'mms',
- 'prospero', 'rtsp', 'rtspu', '', 'sftp']
+ 'prospero', 'rtsp', 'rtspu', '', 'sftp',
+ 'svn', 'svn+ssh']
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
'imap', 'wais', 'file', 'mms', 'https', 'shttp',
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
@@ -85,22 +89,24 @@ class ResultMixin(object):
@property
def hostname(self):
- netloc = self.netloc
- if "@" in netloc:
- netloc = netloc.rsplit("@", 1)[1]
- if ":" in netloc:
- netloc = netloc.split(":", 1)[0]
- return netloc.lower() or None
+ netloc = self.netloc.split('@')[-1]
+ if '[' in netloc and ']' in netloc:
+ return netloc.split(']')[0][1:].lower()
+ elif ':' in netloc:
+ return netloc.split(':')[0].lower()
+ elif netloc == '':
+ return None
+ else:
+ return netloc.lower()
@property
def port(self):
- netloc = self.netloc
- if "@" in netloc:
- netloc = netloc.rsplit("@", 1)[1]
- if ":" in netloc:
- port = netloc.split(":", 1)[1]
+ netloc = self.netloc.split('@')[-1].split(']')[-1]
+ if ':' in netloc:
+ port = netloc.split(':')[1]
return int(port, 10)
- return None
+ else:
+ return None
from collections import namedtuple
@@ -172,6 +178,9 @@ def urlsplit(url, scheme='', allow_fragments=True):
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
+ if (('[' in netloc and ']' not in netloc) or
+ (']' in netloc and '[' not in netloc)):
+ raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
@@ -183,10 +192,18 @@ def urlsplit(url, scheme='', allow_fragments=True):
if c not in scheme_chars:
break
else:
- scheme, url = url[:i].lower(), url[i+1:]
+ try:
+ # make sure "url" is not actually a port number (in which case
+ # "scheme" is really part of the path
+ _testportnum = int(url[i+1:])
+ except ValueError:
+ scheme, url = url[:i].lower(), url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
+ if (('[' in netloc and ']' not in netloc) or
+ (']' in netloc and '[' not in netloc)):
+ raise ValueError("Invalid IPv6 URL")
if allow_fragments and scheme in uses_fragment and '#' in url:
url, fragment = url.split('#', 1)
if scheme in uses_query and '?' in url:
@@ -244,14 +261,9 @@ def urljoin(base, url, allow_fragments=True):
if path[:1] == '/':
return urlunparse((scheme, netloc, path,
params, query, fragment))
- if not path:
+ if not path and not params:
path = bpath
- if not params:
- params = bparams
- else:
- path = path[:-1]
- return urlunparse((scheme, netloc, path,
- params, query, fragment))
+ params = bparams
if not query:
query = bquery
return urlunparse((scheme, netloc, path,
@@ -295,35 +307,39 @@ def urldefrag(url):
return url, ''
# unquote method for parse_qs and parse_qsl
-# Cannot use directly from urllib as it would create circular reference.
-# urllib uses urlparse methods ( urljoin)
-
+# Cannot use directly from urllib as it would create a circular reference
+# because urllib uses urlparse methods (urljoin). If you update this function,
+# update it also in urllib. This code duplication does not existin in Python3.
_hexdig = '0123456789ABCDEFabcdef'
-_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
+_hextochr = dict((a+b, chr(int(a+b,16)))
+ for a in _hexdig for b in _hexdig)
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
res = s.split('%')
- for i in xrange(1, len(res)):
- item = res[i]
+ # fastpath
+ if len(res) == 1:
+ return s
+ s = res[0]
+ for item in res[1:]:
try:
- res[i] = _hextochr[item[:2]] + item[2:]
+ s += _hextochr[item[:2]] + item[2:]
except KeyError:
- res[i] = '%' + item
+ s += '%' + item
except UnicodeDecodeError:
- res[i] = unichr(int(item[:2], 16)) + item[2:]
- return "".join(res)
+ s += unichr(int(item[:2], 16)) + item[2:]
+ return s
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
"""Parse a query given as a string argument.
Arguments:
- qs: URL-encoded query string to be parsed
+ qs: percent-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
- URL encoded queries should be treated as blank strings.
+ percent-encoded queries should be treated as blank strings.
A true value indicates that blanks should be retained as
blank strings. The default false value indicates that
blank values are to be ignored and treated as if they were
@@ -346,10 +362,10 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
Arguments:
- qs: URL-encoded query string to be parsed
+ qs: percent-encoded query string to be parsed
keep_blank_values: flag indicating whether blank values in
- URL encoded queries should be treated as blank strings. A
+ percent-encoded queries should be treated as blank strings. A
true value indicates that blanks should be retained as blank
strings. The default false value indicates that blank values
are to be ignored and treated as if they were not included.