diff options
Diffstat (limited to 'urllib3')
| -rw-r--r-- | urllib3/__init__.py | 5 | ||||
| -rw-r--r-- | urllib3/_collections.py | 3 | ||||
| -rw-r--r-- | urllib3/connectionpool.py | 32 | ||||
| -rw-r--r-- | urllib3/poolmanager.py | 46 | ||||
| -rw-r--r-- | urllib3/request.py | 63 | ||||
| -rw-r--r-- | urllib3/response.py | 7 | ||||
| -rw-r--r-- | urllib3/sessionmanager.py | 86 | ||||
| -rw-r--r-- | urllib3/util/retry.py | 18 | ||||
| -rw-r--r-- | urllib3/util/sessioncontext.py | 74 |
9 files changed, 276 insertions, 58 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py index 49b9dc66..b0a4da46 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -18,7 +18,8 @@ from .util.request import make_headers from .util.url import get_host from .util.timeout import Timeout from .util.retry import Retry - +from .util.sessioncontext import SessionContext +from .sessionmanager import SessionManager # Set default logging handler to avoid "No handler found" warnings. import logging @@ -38,6 +39,7 @@ __all__ = ( 'HTTPSConnectionPool', 'PoolManager', 'ProxyManager', + 'SessionManager', 'HTTPResponse', 'Retry', 'Timeout', @@ -48,6 +50,7 @@ __all__ = ( 'get_host', 'make_headers', 'proxy_from_url', + 'SessionContext' ) logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 77cee017..9edce3c9 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -272,6 +272,9 @@ class HTTPHeaderDict(MutableMapping): getallmatchingheaders = getlist iget = getlist + def get_all(self, key, failobj=None): + return self.getlist(key) or failobj + def __repr__(self): return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index d7362d5d..e8c02c89 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -453,7 +453,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Get a connection from the pool and perform an HTTP request. This is the lowest level call for making a request, so you'll need to specify all - the raw details. + the raw details. To make HTTP calls, use :func:`urllib3.request.RequestMethods.request` + instead. .. note:: @@ -671,28 +672,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): release_conn=release_conn, **response_kw) # Handle redirect? - redirect_location = redirect and response.get_redirect_location() - if redirect_location: - if response.status == 303: - method = 'GET' - - try: - retries = retries.increment(method, url, response=response, _pool=self) - except MaxRetryError: - if retries.raise_on_redirect: - # Release the connection for this response, since we're not - # returning it to be released manually. - response.release_conn() - raise - return response - - log.info("Redirecting %s -> %s", url, redirect_location) - return self.urlopen( - method, redirect_location, body, headers, - retries=retries, redirect=redirect, - assert_same_host=assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + if redirect and response.get_redirect_location(): + return self.redirect( + response=response, method=method, retries=retries, + url=url, headers=headers, body=body, + assert_same_host=assert_same_host, timeout=timeout, + pool_timeout=pool_timeout, release_conn=release_conn, + redirect=redirect, **response_kw) # Check if we should retry the HTTP response. if retries.is_forced_retry(method, status_code=response.status): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 276b54dd..992a281f 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -6,8 +6,7 @@ import logging from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown -from .packages.six.moves.urllib.parse import urljoin +from .exceptions import LocationValueError, ProxySchemeUnknown from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -221,7 +220,7 @@ class PoolManager(RequestMethods): u = parse_url(url) return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - def urlopen(self, method, url, redirect=True, **kw): + def urlopen(self, method, url, redirect=True, retries=None, **kw): """ Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` with custom cross-host redirect logic and only sends the request-uri @@ -229,47 +228,30 @@ class PoolManager(RequestMethods): The given ``url`` parameter must be absolute, such that an appropriate :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + + This is a low-level method; use :func:`urllib3.request.RequestMethods.request` + instead. """ u = parse_url(url) conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - kw['assert_same_host'] = False kw['redirect'] = False if 'headers' not in kw: kw['headers'] = self.headers - if self.proxy is not None and u.scheme == "http": - response = conn.urlopen(method, url, **kw) - else: - response = conn.urlopen(method, u.request_uri, **kw) - - redirect_location = redirect and response.get_redirect_location() - if not redirect_location: - return response - - # Support relative URLs for redirecting. - redirect_location = urljoin(url, redirect_location) - - # RFC 7231, Section 6.4.4 - if response.status == 303: - method = 'GET' - - retries = kw.get('retries') if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - try: - retries = retries.increment(method, url, response=response, _pool=conn) - except MaxRetryError: - if retries.raise_on_redirect: - raise - return response - - kw['retries'] = retries - kw['redirect'] = redirect + if self.proxy is not None and u.scheme == "http": + response = conn.urlopen(method, url, retries=retries, **kw) + else: + response = conn.urlopen(method, u.request_uri, retries=retries, **kw) - log.info("Redirecting %s -> %s", url, redirect_location) - return self.urlopen(method, redirect_location, **kw) + if redirect and response.get_redirect_location(): + kw['redirect'] = redirect + return self.redirect(response=response, method=method, retries=retries, + url=url, pool=conn, **kw) + return response class ProxyManager(PoolManager): diff --git a/urllib3/request.py b/urllib3/request.py index c0fddff0..f1171498 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,10 +1,50 @@ from __future__ import absolute_import +import logging from .filepost import encode_multipart_formdata +from .exceptions import MaxRetryError + +from .packages.six.moves.urllib.request import Request as _Request from .packages.six.moves.urllib.parse import urlencode +from .packages.six.moves.urllib.parse import urljoin +__all__ = ['RequestMethods', 'Request'] -__all__ = ['RequestMethods'] +log = logging.getLogger(__name__) + + +class Request(_Request): + """ + Currently used as a shim to allow us to work with the stdlib cookie + handling, which expects a `urllib.request.Request`-like object. + """ + def __init__(self, *args, **kwargs): + del kwargs['method'] + # Request is an old-style class in Python 2 + _Request.__init__(self, *args, **kwargs) + self._cookies = [] + # If there's an existing Cookie header, let's split it up + # so we can handle it similarly to those we get from a jar. + if self.has_header('Cookie'): + self._cookies = self.get_header('Cookie').split('; ') + + def add_cookies(self, *cookies): + """ + We keep track of individual cookies so that we can keep them from + duplicating, and re-render the Cookie header when we get new ones. + """ + for each in cookies: + if each not in self._cookies: + self._cookies.append(each) + self.add_header('Cookie', '; '.join(self._cookies)) + + def get_all_headers(self): + """ + Returns a complete set of all headers + """ + headers = self.unredirected_hdrs.copy() + headers.update(self.headers) + return headers class RequestMethods(object): @@ -146,3 +186,24 @@ class RequestMethods(object): extra_kw.update(urlopen_kw) return self.urlopen(method, url, **extra_kw) + + def redirect(self, response, method, retries, **kwargs): + """ + Abstracts the redirect process to be used from any :class:`RequestMethods` object + """ + url = kwargs.pop('url', '') + redirect_location = urljoin(url, response.get_redirect_location()) + method = retries.redirect_method(method, response.status) + pool = kwargs.pop('pool', self) + try: + retries = retries.increment(method, url, response=response, _pool=pool) + except MaxRetryError: + if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() + raise + return response + + log.info("Redirecting %s -> %s", url, redirect_location) + return self.urlopen(method=method, url=redirect_location, retries=retries, **kwargs) diff --git a/urllib3/response.py b/urllib3/response.py index be2accda..5b8d28f6 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -187,6 +187,13 @@ class HTTPResponse(io.IOBase): def connection(self): return self._connection + def info(self): + """ + This is a compatibility method that's only used by urllib3's cookie + handlers; don't use it in your own code. + """ + return self.headers + def tell(self): """ Obtain the number of bytes pulled over the wire so far. May differ from diff --git a/urllib3/sessionmanager.py b/urllib3/sessionmanager.py new file mode 100644 index 00000000..3df71e6d --- /dev/null +++ b/urllib3/sessionmanager.py @@ -0,0 +1,86 @@ +from .util.retry import Retry +from .poolmanager import PoolManager, ProxyManager +from .request import RequestMethods, Request +from .util.sessioncontext import SessionContext + + +class SessionManager(RequestMethods): + """ + Allows arbitrary requests while maintaining session context across + those requests. Currently, that context consists of automatic + cookie storage and retrieval. + + :param manager: + An appropriate :class:`urllib3.poolmanager.PoolManager` or + :class:`urllib3.poolmanager.ProxyManager` object + to handle HTTP requests for the SessionManager + + :param context: + A predefined :class:`urllib3.util.context.SessionContext` object to use in the session; + if not provided, a new one will be created. + + :param headers: + Headers to include with all requests, unless other + headers are given explicitly. + + Example:: + + >>> manager = SessionManager(PoolManager()) + >>> manager.context.cookie_jar + <CookieJar[]> + >>> len(manager.context.cookie_jar) + 0 + >>> manager.request('GET', 'http://google.com') + >>> manager.request('GET', 'http://yahoo.com') + >>> len(manager.context.cookie_jar) + 2 + + """ + + manager_class = PoolManager + + def __init__(self, context=None, headers=None, manager=None, **manager_kw): + super(SessionManager, self).__init__(headers=headers) + self.manager = self.manager_class(**manager_kw) if manager is None else manager + self.context = context or SessionContext() + + def urlopen(self, method, url, redirect=True, retries=None, **kw): + """ + Same as :meth:`urllib3.poolmanager.PoolManager.urlopen` with added + request-context-managing special sauce. The received ``url`` param + must be an absolute path. + + This is a low-level method; use :func:`urllib3.request.RequestMethods.request` + instead. + """ + headers = kw.pop('headers', self.headers) + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + # Build a mock Request object to work with + request_object = Request(url=url, method=method, headers=headers) + self.context.apply_to(request_object) + modified_headers = request_object.get_all_headers() + + # Ensure that redirects happen at this level only + kw['redirect'] = False + kw['headers'] = modified_headers + + response = self.manager.urlopen(method, url, retries=retries, **kw) + + # Retrieve any context from the response + self.context.extract_from(response, request_object) + + # Redirect as necessary, and return. + if redirect and response.get_redirect_location(): + kw['redirect'] = redirect + kw['headers'] = headers + return self.redirect(response=response, method=method, + retries=retries, url=url, **kw) + return response + + +class ProxySessionManager(SessionManager): + + manager_class = ProxyManager diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index f8f21810..edbfc873 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -13,7 +13,6 @@ from ..exceptions import ( ) from ..packages import six - log = logging.getLogger(__name__) # Data structure for representing the metadata of requests that result in a retry. @@ -21,6 +20,9 @@ RequestHistory = namedtuple('RequestHistory', ["method", "url", "error", "status", "redirect_location"]) +_POST_REDIRECT_DOWNGRADE_STATUSES = set([301, 302, 303]) + + class Retry(object): """ Retry configuration. @@ -311,6 +313,20 @@ class Retry(object): 'read={self.read}, redirect={self.redirect})').format( cls=type(self), self=self) + def redirect_method(self, method, status): + """ + Assuming we're doing a redirect, should we change HTTP methods? + """ + if method == 'GET': + return method + if method == 'HEAD': + return method + if status == 303: + return 'GET' + if method == 'POST' and status in _POST_REDIRECT_DOWNGRADE_STATUSES: + return 'GET' + return method + # For backwards compatibility (equivalent to pre-v1.9): Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/sessioncontext.py b/urllib3/util/sessioncontext.py new file mode 100644 index 00000000..3b323dca --- /dev/null +++ b/urllib3/util/sessioncontext.py @@ -0,0 +1,74 @@ +import time + +from ..packages.six.moves.http_cookiejar import ( + DefaultCookiePolicy as PythonCookiePolicy, + CookieJar as PythonCookieJar +) + + +class DefaultCookiePolicy(PythonCookiePolicy): + """ + The default urllib3 cookie policy - similar to the Python default, + but :param:`strict_ns_domain` is set to `DomainStrict` for security. + """ + def __init__(self, *args, **kwargs): + policy = PythonCookiePolicy.DomainStrict + kwargs.setdefault('strict_ns_domain', policy) + # Old-style class on Python 2 + PythonCookiePolicy.__init__(self, *args, **kwargs) + + +class CookieJar(PythonCookieJar): + + def __init__(self, policy=None): + if policy is None: + policy = DefaultCookiePolicy() + # Old-style class on Python 2 + PythonCookieJar.__init__(self, policy=policy) + + def add_cookie_header(self, request): + """ + Add correct Cookie: header to Request object. + This is copied from and slightly modified from the stdlib version. + """ + with self._cookies_lock: + self._policy._now = self._now = int(time.time()) + cookies = self._cookies_for_request(request) + attrs = self._cookie_attrs(cookies) + # This is a modification; stdlib sets the entire cookie header + # and only if it's not there already. We're less picky. + if attrs: + request.add_cookies(*attrs) + + self.clear_expired_cookies() + + +class SessionContext(object): + """ + Extensible class encapsulated by :class:`.SessionManager`; currently + used to manage cookies. + + :param cookie_jar: + Used to pass a prebuilt :class:`CookieJar` into the + context to be used instead of an empty jar. + """ + + def __init__(self, cookie_jar=None): + # We unfortunately have to do it this way; empty cookie jars + # evaluate as falsey. + if cookie_jar is not None: + self.cookie_jar = cookie_jar + else: + self.cookie_jar = CookieJar() + + def apply_to(self, request): + """ + Applies changes from the context to the supplied :class:`.request.Request`. + """ + self.cookie_jar.add_cookie_header(request) + + def extract_from(self, response, request): + """ + Extracts context modifications (new cookies, etc) from the response and stores them. + """ + self.cookie_jar.extract_cookies(response, request) |
