diff options
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r-- | Lib/urllib/request.py | 165 |
1 files changed, 73 insertions, 92 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index c789ffce63..3776536d38 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1,6 +1,3 @@ -# Issues in merging urllib and urllib2: -# 1. They both define a function named urlopen() - """An extensible library for opening URLs using a variety of protocols The simplest way to use this module is to call the urlopen function, @@ -83,6 +80,7 @@ f = urllib.request.urlopen('http://www.python.org/') # abstract factory for opener import base64 +import bisect import email import hashlib import http.client @@ -94,7 +92,6 @@ import re import socket import sys import time -import bisect from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( @@ -149,7 +146,7 @@ def request_host(request): comparison. """ - url = request.get_full_url() + url = request.full_url host = urlparse(url)[1] if host == "": host = request.get_header("Host", "") @@ -163,11 +160,7 @@ class Request: def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' - self.__original = unwrap(url) - self.type = None - # self.__r_type is what's left after doing the splittype - self.host = None - self.port = None + self.full_url = unwrap(url) self.data = data self.headers = {} for key, value in headers.items(): @@ -177,26 +170,23 @@ class Request: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable + self._parse() - def __getattr__(self, attr): - # XXX this is a fallback mechanism to guard against these - # methods getting called in a non-standard order. this may be - # too complicated and/or unnecessary. - # XXX should the __r_XXX attributes be public? - if attr[:12] == '_Request__r_': - name = attr[12:] - if hasattr(Request, 'get_' + name): - getattr(self, 'get_' + name)() - return getattr(self, attr) - raise AttributeError(attr) + def _parse(self): + self.type, rest = splittype(self.full_url) + if self.type is None: + raise ValueError("unknown url type: %s" % self.full_url) + self.host, self.selector = splithost(rest) + if self.host: + self.host = unquote(self.host) def get_method(self): - if self.has_data(): + if self.data is not None: return "POST" else: return "GET" - # XXX these helper methods are lame + # Begin deprecated methods def add_data(self, data): self.data = data @@ -208,37 +198,31 @@ class Request: return self.data def get_full_url(self): - return self.__original + return self.full_url def get_type(self): - if self.type is None: - self.type, self.__r_type = splittype(self.__original) - if self.type is None: - raise ValueError("unknown url type: %s" % self.__original) return self.type def get_host(self): - if self.host is None: - self.host, self.__r_host = splithost(self.__r_type) - if self.host: - self.host = unquote(self.host) return self.host def get_selector(self): - return self.__r_host + return self.selector - def set_proxy(self, host, type): - self.host, self.type = host, type - self.__r_host = self.__original - - def has_proxy(self): - return self.__r_host == self.__original + def is_unverifiable(self): + return self.unverifiable def get_origin_req_host(self): return self.origin_req_host - def is_unverifiable(self): - return self.unverifiable + # End deprecated methods + + def set_proxy(self, host, type): + self.host, self.type = host, type + self.selector = self.full_url + + def has_proxy(self): + return self.selector == self.full_url def add_header(self, key, val): # useful for something like authentication @@ -344,10 +328,10 @@ class OpenerDirector: else: req = fullurl if data is not None: - req.add_data(data) + req.data = data req.timeout = timeout - protocol = req.get_type() + protocol = req.type # pre-process request meth_name = protocol+"_request" @@ -371,7 +355,7 @@ class OpenerDirector: if result: return result - protocol = req.get_type() + protocol = req.type result = self._call_chain(self.handle_open, protocol, protocol + '_open', req) if result: @@ -481,7 +465,7 @@ class HTTPErrorProcessor(BaseHandler): class HTTPDefaultErrorHandler(BaseHandler): def http_error_default(self, req, fp, code, msg, hdrs): - raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) + raise HTTPError(req.full_url, code, msg, hdrs, fp) class HTTPRedirectHandler(BaseHandler): # maximum number of redirections to any single URL @@ -504,7 +488,7 @@ class HTTPRedirectHandler(BaseHandler): m = req.get_method() if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") or code in (301, 302, 303) and m == "POST")): - raise HTTPError(req.get_full_url(), code, msg, headers, fp) + raise HTTPError(req.full_url, code, msg, headers, fp) # Strictly (according to RFC 2616), 301 or 302 in response to # a POST MUST NOT cause a redirection without confirmation @@ -518,7 +502,7 @@ class HTTPRedirectHandler(BaseHandler): if k.lower() not in CONTENT_HEADERS) return Request(newurl, headers=newheaders, - origin_req_host=req.get_origin_req_host(), + origin_req_host=req.origin_req_host, unverifiable=True) # Implementation note: To avoid the server sending us into an @@ -542,7 +526,7 @@ class HTTPRedirectHandler(BaseHandler): urlparts[2] = "/" newurl = urlunparse(urlparts) - newurl = urljoin(req.get_full_url(), newurl) + newurl = urljoin(req.full_url, newurl) # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other @@ -557,7 +541,7 @@ class HTTPRedirectHandler(BaseHandler): visited = new.redirect_dict = req.redirect_dict if (visited.get(newurl, 0) >= self.max_repeats or len(visited) >= self.max_redirections): - raise HTTPError(req.get_full_url(), code, + raise HTTPError(req.full_url, code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} @@ -664,7 +648,7 @@ class ProxyHandler(BaseHandler): meth(r, proxy, type)) def proxy_open(self, req, proxy, type): - orig_type = req.get_type() + orig_type = req.type proxy_type, user, password, hostport = _parse_proxy(proxy) if proxy_type is None: proxy_type = orig_type @@ -811,7 +795,7 @@ class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): auth_header = 'Authorization' def http_error_401(self, req, fp, code, msg, headers): - url = req.get_full_url() + url = req.full_url return self.http_error_auth_reqed('www-authenticate', url, req, headers) @@ -825,7 +809,7 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): # authority. Assume there isn't one, since urllib.request does not (and # should not, RFC 3986 s. 3.2.1) support requests for URLs containing # userinfo. - authority = req.get_host() + authority = req.host return self.http_error_auth_reqed('proxy-authenticate', authority, req, headers) @@ -864,7 +848,7 @@ class AbstractDigestAuthHandler: # prompting for the information. Crap. This isn't great # but it's better than the current 'repeat until recursion # depth exceeded' approach <wink> - raise HTTPError(req.get_full_url(), 401, "digest auth failed", + raise HTTPError(req.full_url, 401, "digest auth failed", headers, None) else: self.retried += 1 @@ -912,20 +896,20 @@ class AbstractDigestAuthHandler: if H is None: return None - user, pw = self.passwd.find_user_password(realm, req.get_full_url()) + user, pw = self.passwd.find_user_password(realm, req.full_url) if user is None: return None # XXX not implemented yet - if req.has_data(): - entdig = self.get_entity_digest(req.get_data(), chal) + if req.data is not None: + entdig = self.get_entity_digest(req.data, chal) else: entdig = None A1 = "%s:%s:%s" % (user, realm, pw) A2 = "%s:%s" % (req.get_method(), # XXX selector: what about proxies and full urls - req.get_selector()) + req.selector) if qop == 'auth': self.nonce_count += 1 ncvalue = '%08x' % self.nonce_count @@ -941,7 +925,7 @@ class AbstractDigestAuthHandler: # XXX should the partial digests be encoded too? base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (user, realm, nonce, req.get_selector(), + 'response="%s"' % (user, realm, nonce, req.selector, respdig) if opaque: base += ', opaque="%s"' % opaque @@ -978,7 +962,7 @@ class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): handler_order = 490 # before Basic auth def http_error_401(self, req, fp, code, msg, headers): - host = urlparse(req.get_full_url())[1] + host = urlparse(req.full_url)[1] retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) self.reset_retry_count() @@ -991,7 +975,7 @@ class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): handler_order = 490 # before Basic auth def http_error_407(self, req, fp, code, msg, headers): - host = req.get_host() + host = req.host retry = self.http_error_auth_reqed('proxy-authenticate', host, req, headers) self.reset_retry_count() @@ -1006,12 +990,12 @@ class AbstractHTTPHandler(BaseHandler): self._debuglevel = level def do_request_(self, request): - host = request.get_host() + host = request.host if not host: raise URLError('no host given') - if request.has_data(): # POST - data = request.get_data() + if request.data is not None: # POST + data = request.data if not request.has_header('Content-type'): request.add_unredirected_header( 'Content-type', @@ -1022,7 +1006,7 @@ class AbstractHTTPHandler(BaseHandler): sel_host = host if request.has_proxy(): - scheme, sel = splittype(request.get_selector()) + scheme, sel = splittype(request.selector) sel_host, sel_path = splithost(sel) if not request.has_header('Host'): request.add_unredirected_header('Host', sel_host) @@ -1034,16 +1018,11 @@ class AbstractHTTPHandler(BaseHandler): return request def do_open(self, http_class, req): - """Return an addinfourl object for the request, using http_class. + """Return an HTTPResponse object for the request, using http_class. http_class must implement the HTTPConnection API from http.client. - The addinfourl return value is a file-like object. It also - has methods and attributes including: - - info(): return a email Message object for the headers - - geturl(): return the original request URL - - code: HTTP status code """ - host = req.get_host() + host = req.host if not host: raise URLError('no host given') @@ -1061,19 +1040,21 @@ class AbstractHTTPHandler(BaseHandler): # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" - headers = dict( - (name.title(), val) for name, val in headers.items()) + headers = dict((name.title(), val) for name, val in headers.items()) try: - h.request(req.get_method(), req.get_selector(), req.data, headers) - r = h.getresponse() - except socket.error as err: # XXX what error? + h.request(req.get_method(), req.selector, req.data, headers) + r = h.getresponse() # an HTTPResponse instance + except socket.error as err: raise URLError(err) -## resp = addinfourl(r.fp, r.msg, req.get_full_url()) - resp = addinfourl(r, r.msg, req.get_full_url()) - resp.code = r.status - resp.msg = r.reason - return resp + r.url = req.full_url + # This line replaces the .msg attribute of the HTTPResponse + # with .headers, because urllib clients expect the response to + # have the reason in .msg. It would be good to mark this + # attribute is deprecated and get then to use info() or + # .headers. + r.msg = r.reason + return r class HTTPHandler(AbstractHTTPHandler): @@ -1111,7 +1092,7 @@ class HTTPCookieProcessor(BaseHandler): class UnknownHandler(BaseHandler): def unknown_open(self, req): - type = req.get_type() + type = req.type raise URLError('unknown url type: %s' % type) def parse_keqv_list(l): @@ -1170,7 +1151,7 @@ def parse_http_list(s): class FileHandler(BaseHandler): # Use local file or FTP depending on form of URL def file_open(self, req): - url = req.get_selector() + url = req.selector if url[:2] == '//' and url[2:3] != '/': req.type = 'ftp' return self.parent.open(req) @@ -1192,8 +1173,8 @@ class FileHandler(BaseHandler): def open_local_file(self, req): import email.utils import mimetypes - host = req.get_host() - file = req.get_selector() + host = req.host + file = req.selector localfile = url2pathname(file) try: stats = os.stat(localfile) @@ -1223,7 +1204,7 @@ class FTPHandler(BaseHandler): def ftp_open(self, req): import ftplib import mimetypes - host = req.get_host() + host = req.host if not host: raise URLError('ftp error: no host given') host, port = splitport(host) @@ -1246,7 +1227,7 @@ class FTPHandler(BaseHandler): host = socket.gethostbyname(host) except socket.error as msg: raise URLError(msg) - path, attrs = splitattr(req.get_selector()) + path, attrs = splitattr(req.selector) dirs = path.split('/') dirs = list(map(unquote, dirs)) dirs, file = dirs[:-1], dirs[-1] @@ -1262,13 +1243,13 @@ class FTPHandler(BaseHandler): type = value.upper() fp, retrlen = fw.retrfile(file, type) headers = "" - mtype = mimetypes.guess_type(req.get_full_url())[0] + mtype = mimetypes.guess_type(req.full_url)[0] if mtype: headers += "Content-type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-length: %d\n" % retrlen headers = email.message_from_string(headers) - return addinfourl(fp, headers, req.get_full_url()) + return addinfourl(fp, headers, req.full_url) except ftplib.all_errors as msg: exc = URLError('ftp error: %s' % msg) raise exc.with_traceback(sys.exc_info()[2]) @@ -1581,9 +1562,9 @@ class URLopener: else: auth = None http_conn = connection_factory(host) - # XXX We should fix urllib so that it works with HTTP/1.1. - http_conn._http_vsn = 10 - http_conn._http_vsn_str = "HTTP/1.0" +## # XXX We should fix urllib so that it works with HTTP/1.1. +## http_conn._http_vsn = 10 +## http_conn._http_vsn_str = "HTTP/1.0" headers = {} if proxy_auth: |