summaryrefslogtreecommitdiff
path: root/urllib3
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3')
-rw-r--r--urllib3/__init__.py5
-rw-r--r--urllib3/_collections.py3
-rw-r--r--urllib3/connectionpool.py32
-rw-r--r--urllib3/poolmanager.py46
-rw-r--r--urllib3/request.py63
-rw-r--r--urllib3/response.py7
-rw-r--r--urllib3/sessionmanager.py86
-rw-r--r--urllib3/util/retry.py18
-rw-r--r--urllib3/util/sessioncontext.py74
9 files changed, 276 insertions, 58 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py
index 49b9dc66..b0a4da46 100644
--- a/urllib3/__init__.py
+++ b/urllib3/__init__.py
@@ -18,7 +18,8 @@ from .util.request import make_headers
from .util.url import get_host
from .util.timeout import Timeout
from .util.retry import Retry
-
+from .util.sessioncontext import SessionContext
+from .sessionmanager import SessionManager
# Set default logging handler to avoid "No handler found" warnings.
import logging
@@ -38,6 +39,7 @@ __all__ = (
'HTTPSConnectionPool',
'PoolManager',
'ProxyManager',
+ 'SessionManager',
'HTTPResponse',
'Retry',
'Timeout',
@@ -48,6 +50,7 @@ __all__ = (
'get_host',
'make_headers',
'proxy_from_url',
+ 'SessionContext'
)
logging.getLogger(__name__).addHandler(NullHandler())
diff --git a/urllib3/_collections.py b/urllib3/_collections.py
index 77cee017..9edce3c9 100644
--- a/urllib3/_collections.py
+++ b/urllib3/_collections.py
@@ -272,6 +272,9 @@ class HTTPHeaderDict(MutableMapping):
getallmatchingheaders = getlist
iget = getlist
+ def get_all(self, key, failobj=None):
+ return self.getlist(key) or failobj
+
def __repr__(self):
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index d7362d5d..e8c02c89 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -453,7 +453,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Get a connection from the pool and perform an HTTP request. This is the
lowest level call for making a request, so you'll need to specify all
- the raw details.
+ the raw details. To make HTTP calls, use :func:`urllib3.request.RequestMethods.request`
+ instead.
.. note::
@@ -671,28 +672,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
release_conn=release_conn, **response_kw)
# Handle redirect?
- redirect_location = redirect and response.get_redirect_location()
- if redirect_location:
- if response.status == 303:
- method = 'GET'
-
- try:
- retries = retries.increment(method, url, response=response, _pool=self)
- except MaxRetryError:
- if retries.raise_on_redirect:
- # Release the connection for this response, since we're not
- # returning it to be released manually.
- response.release_conn()
- raise
- return response
-
- log.info("Redirecting %s -> %s", url, redirect_location)
- return self.urlopen(
- method, redirect_location, body, headers,
- retries=retries, redirect=redirect,
- assert_same_host=assert_same_host,
- timeout=timeout, pool_timeout=pool_timeout,
- release_conn=release_conn, **response_kw)
+ if redirect and response.get_redirect_location():
+ return self.redirect(
+ response=response, method=method, retries=retries,
+ url=url, headers=headers, body=body,
+ assert_same_host=assert_same_host, timeout=timeout,
+ pool_timeout=pool_timeout, release_conn=release_conn,
+ redirect=redirect, **response_kw)
# Check if we should retry the HTTP response.
if retries.is_forced_retry(method, status_code=response.status):
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index 276b54dd..992a281f 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -6,8 +6,7 @@ import logging
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
-from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
-from .packages.six.moves.urllib.parse import urljoin
+from .exceptions import LocationValueError, ProxySchemeUnknown
from .request import RequestMethods
from .util.url import parse_url
from .util.retry import Retry
@@ -221,7 +220,7 @@ class PoolManager(RequestMethods):
u = parse_url(url)
return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
- def urlopen(self, method, url, redirect=True, **kw):
+ def urlopen(self, method, url, redirect=True, retries=None, **kw):
"""
Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
with custom cross-host redirect logic and only sends the request-uri
@@ -229,47 +228,30 @@ class PoolManager(RequestMethods):
The given ``url`` parameter must be absolute, such that an appropriate
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
+
+ This is a low-level method; use :func:`urllib3.request.RequestMethods.request`
+ instead.
"""
u = parse_url(url)
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
-
kw['assert_same_host'] = False
kw['redirect'] = False
if 'headers' not in kw:
kw['headers'] = self.headers
- if self.proxy is not None and u.scheme == "http":
- response = conn.urlopen(method, url, **kw)
- else:
- response = conn.urlopen(method, u.request_uri, **kw)
-
- redirect_location = redirect and response.get_redirect_location()
- if not redirect_location:
- return response
-
- # Support relative URLs for redirecting.
- redirect_location = urljoin(url, redirect_location)
-
- # RFC 7231, Section 6.4.4
- if response.status == 303:
- method = 'GET'
-
- retries = kw.get('retries')
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect)
- try:
- retries = retries.increment(method, url, response=response, _pool=conn)
- except MaxRetryError:
- if retries.raise_on_redirect:
- raise
- return response
-
- kw['retries'] = retries
- kw['redirect'] = redirect
+ if self.proxy is not None and u.scheme == "http":
+ response = conn.urlopen(method, url, retries=retries, **kw)
+ else:
+ response = conn.urlopen(method, u.request_uri, retries=retries, **kw)
- log.info("Redirecting %s -> %s", url, redirect_location)
- return self.urlopen(method, redirect_location, **kw)
+ if redirect and response.get_redirect_location():
+ kw['redirect'] = redirect
+ return self.redirect(response=response, method=method, retries=retries,
+ url=url, pool=conn, **kw)
+ return response
class ProxyManager(PoolManager):
diff --git a/urllib3/request.py b/urllib3/request.py
index c0fddff0..f1171498 100644
--- a/urllib3/request.py
+++ b/urllib3/request.py
@@ -1,10 +1,50 @@
from __future__ import absolute_import
+import logging
from .filepost import encode_multipart_formdata
+from .exceptions import MaxRetryError
+
+from .packages.six.moves.urllib.request import Request as _Request
from .packages.six.moves.urllib.parse import urlencode
+from .packages.six.moves.urllib.parse import urljoin
+__all__ = ['RequestMethods', 'Request']
-__all__ = ['RequestMethods']
+log = logging.getLogger(__name__)
+
+
+class Request(_Request):
+ """
+ Currently used as a shim to allow us to work with the stdlib cookie
+ handling, which expects a `urllib.request.Request`-like object.
+ """
+ def __init__(self, *args, **kwargs):
+ del kwargs['method']
+ # Request is an old-style class in Python 2
+ _Request.__init__(self, *args, **kwargs)
+ self._cookies = []
+ # If there's an existing Cookie header, let's split it up
+ # so we can handle it similarly to those we get from a jar.
+ if self.has_header('Cookie'):
+ self._cookies = self.get_header('Cookie').split('; ')
+
+ def add_cookies(self, *cookies):
+ """
+ We keep track of individual cookies so that we can keep them from
+ duplicating, and re-render the Cookie header when we get new ones.
+ """
+ for each in cookies:
+ if each not in self._cookies:
+ self._cookies.append(each)
+ self.add_header('Cookie', '; '.join(self._cookies))
+
+ def get_all_headers(self):
+ """
+ Returns a complete set of all headers
+ """
+ headers = self.unredirected_hdrs.copy()
+ headers.update(self.headers)
+ return headers
class RequestMethods(object):
@@ -146,3 +186,24 @@ class RequestMethods(object):
extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw)
+
+ def redirect(self, response, method, retries, **kwargs):
+ """
+ Abstracts the redirect process to be used from any :class:`RequestMethods` object
+ """
+ url = kwargs.pop('url', '')
+ redirect_location = urljoin(url, response.get_redirect_location())
+ method = retries.redirect_method(method, response.status)
+ pool = kwargs.pop('pool', self)
+ try:
+ retries = retries.increment(method, url, response=response, _pool=pool)
+ except MaxRetryError:
+ if retries.raise_on_redirect:
+ # Release the connection for this response, since we're not
+ # returning it to be released manually.
+ response.release_conn()
+ raise
+ return response
+
+ log.info("Redirecting %s -> %s", url, redirect_location)
+ return self.urlopen(method=method, url=redirect_location, retries=retries, **kwargs)
diff --git a/urllib3/response.py b/urllib3/response.py
index be2accda..5b8d28f6 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -187,6 +187,13 @@ class HTTPResponse(io.IOBase):
def connection(self):
return self._connection
+ def info(self):
+ """
+ This is a compatibility method that's only used by urllib3's cookie
+ handlers; don't use it in your own code.
+ """
+ return self.headers
+
def tell(self):
"""
Obtain the number of bytes pulled over the wire so far. May differ from
diff --git a/urllib3/sessionmanager.py b/urllib3/sessionmanager.py
new file mode 100644
index 00000000..3df71e6d
--- /dev/null
+++ b/urllib3/sessionmanager.py
@@ -0,0 +1,86 @@
+from .util.retry import Retry
+from .poolmanager import PoolManager, ProxyManager
+from .request import RequestMethods, Request
+from .util.sessioncontext import SessionContext
+
+
+class SessionManager(RequestMethods):
+ """
+ Allows arbitrary requests while maintaining session context across
+ those requests. Currently, that context consists of automatic
+ cookie storage and retrieval.
+
+ :param manager:
+ An appropriate :class:`urllib3.poolmanager.PoolManager` or
+ :class:`urllib3.poolmanager.ProxyManager` object
+ to handle HTTP requests for the SessionManager
+
+ :param context:
+ A predefined :class:`urllib3.util.context.SessionContext` object to use in the session;
+ if not provided, a new one will be created.
+
+ :param headers:
+ Headers to include with all requests, unless other
+ headers are given explicitly.
+
+ Example::
+
+ >>> manager = SessionManager(PoolManager())
+ >>> manager.context.cookie_jar
+ <CookieJar[]>
+ >>> len(manager.context.cookie_jar)
+ 0
+ >>> manager.request('GET', 'http://google.com')
+ >>> manager.request('GET', 'http://yahoo.com')
+ >>> len(manager.context.cookie_jar)
+ 2
+
+ """
+
+ manager_class = PoolManager
+
+ def __init__(self, context=None, headers=None, manager=None, **manager_kw):
+ super(SessionManager, self).__init__(headers=headers)
+ self.manager = self.manager_class(**manager_kw) if manager is None else manager
+ self.context = context or SessionContext()
+
+ def urlopen(self, method, url, redirect=True, retries=None, **kw):
+ """
+ Same as :meth:`urllib3.poolmanager.PoolManager.urlopen` with added
+ request-context-managing special sauce. The received ``url`` param
+ must be an absolute path.
+
+ This is a low-level method; use :func:`urllib3.request.RequestMethods.request`
+ instead.
+ """
+ headers = kw.pop('headers', self.headers)
+
+ if not isinstance(retries, Retry):
+ retries = Retry.from_int(retries, redirect=redirect)
+
+ # Build a mock Request object to work with
+ request_object = Request(url=url, method=method, headers=headers)
+ self.context.apply_to(request_object)
+ modified_headers = request_object.get_all_headers()
+
+ # Ensure that redirects happen at this level only
+ kw['redirect'] = False
+ kw['headers'] = modified_headers
+
+ response = self.manager.urlopen(method, url, retries=retries, **kw)
+
+ # Retrieve any context from the response
+ self.context.extract_from(response, request_object)
+
+ # Redirect as necessary, and return.
+ if redirect and response.get_redirect_location():
+ kw['redirect'] = redirect
+ kw['headers'] = headers
+ return self.redirect(response=response, method=method,
+ retries=retries, url=url, **kw)
+ return response
+
+
+class ProxySessionManager(SessionManager):
+
+ manager_class = ProxyManager
diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py
index f8f21810..edbfc873 100644
--- a/urllib3/util/retry.py
+++ b/urllib3/util/retry.py
@@ -13,7 +13,6 @@ from ..exceptions import (
)
from ..packages import six
-
log = logging.getLogger(__name__)
# Data structure for representing the metadata of requests that result in a retry.
@@ -21,6 +20,9 @@ RequestHistory = namedtuple('RequestHistory', ["method", "url", "error",
"status", "redirect_location"])
+_POST_REDIRECT_DOWNGRADE_STATUSES = set([301, 302, 303])
+
+
class Retry(object):
""" Retry configuration.
@@ -311,6 +313,20 @@ class Retry(object):
'read={self.read}, redirect={self.redirect})').format(
cls=type(self), self=self)
+ def redirect_method(self, method, status):
+ """
+ Assuming we're doing a redirect, should we change HTTP methods?
+ """
+ if method == 'GET':
+ return method
+ if method == 'HEAD':
+ return method
+ if status == 303:
+ return 'GET'
+ if method == 'POST' and status in _POST_REDIRECT_DOWNGRADE_STATUSES:
+ return 'GET'
+ return method
+
# For backwards compatibility (equivalent to pre-v1.9):
Retry.DEFAULT = Retry(3)
diff --git a/urllib3/util/sessioncontext.py b/urllib3/util/sessioncontext.py
new file mode 100644
index 00000000..3b323dca
--- /dev/null
+++ b/urllib3/util/sessioncontext.py
@@ -0,0 +1,74 @@
+import time
+
+from ..packages.six.moves.http_cookiejar import (
+ DefaultCookiePolicy as PythonCookiePolicy,
+ CookieJar as PythonCookieJar
+)
+
+
+class DefaultCookiePolicy(PythonCookiePolicy):
+ """
+ The default urllib3 cookie policy - similar to the Python default,
+ but :param:`strict_ns_domain` is set to `DomainStrict` for security.
+ """
+ def __init__(self, *args, **kwargs):
+ policy = PythonCookiePolicy.DomainStrict
+ kwargs.setdefault('strict_ns_domain', policy)
+ # Old-style class on Python 2
+ PythonCookiePolicy.__init__(self, *args, **kwargs)
+
+
+class CookieJar(PythonCookieJar):
+
+ def __init__(self, policy=None):
+ if policy is None:
+ policy = DefaultCookiePolicy()
+ # Old-style class on Python 2
+ PythonCookieJar.__init__(self, policy=policy)
+
+ def add_cookie_header(self, request):
+ """
+ Add correct Cookie: header to Request object.
+ This is copied from and slightly modified from the stdlib version.
+ """
+ with self._cookies_lock:
+ self._policy._now = self._now = int(time.time())
+ cookies = self._cookies_for_request(request)
+ attrs = self._cookie_attrs(cookies)
+ # This is a modification; stdlib sets the entire cookie header
+ # and only if it's not there already. We're less picky.
+ if attrs:
+ request.add_cookies(*attrs)
+
+ self.clear_expired_cookies()
+
+
+class SessionContext(object):
+ """
+ Extensible class encapsulated by :class:`.SessionManager`; currently
+ used to manage cookies.
+
+ :param cookie_jar:
+ Used to pass a prebuilt :class:`CookieJar` into the
+ context to be used instead of an empty jar.
+ """
+
+ def __init__(self, cookie_jar=None):
+ # We unfortunately have to do it this way; empty cookie jars
+ # evaluate as falsey.
+ if cookie_jar is not None:
+ self.cookie_jar = cookie_jar
+ else:
+ self.cookie_jar = CookieJar()
+
+ def apply_to(self, request):
+ """
+ Applies changes from the context to the supplied :class:`.request.Request`.
+ """
+ self.cookie_jar.add_cookie_header(request)
+
+ def extract_from(self, response, request):
+ """
+ Extracts context modifications (new cookies, etc) from the response and stores them.
+ """
+ self.cookie_jar.extract_cookies(response, request)