summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--httpcache/README.rst103
-rw-r--r--httpcache/conftest.py41
-rw-r--r--httpcache/httpcache/__init__.py229
-rw-r--r--httpcache/httpcache/cache.py155
-rw-r--r--httpcache/httpcache/tests/__init__.py0
-rw-r--r--httpcache/httpcache/tests/integration/test_caching_conditions.py44
-rw-r--r--httpcache/httpcache/tests/server.py36
-rw-r--r--httpcache/httpcache/tests/test_cache.py76
-rw-r--r--httpcache/httpcache/tests/test_cache_control.py167
-rw-r--r--httpcache/httpcache/tests/test_cache_invalidation.py39
-rw-r--r--httpcache/httpcache/tests/test_max_age.py50
-rw-r--r--httpcache/setup.py18
m---------py-reqcache0
-rw-r--r--requests/cache.py150
-rw-r--r--requests/sessions.py8
-rw-r--r--t.py26
16 files changed, 1133 insertions, 9 deletions
diff --git a/httpcache/README.rst b/httpcache/README.rst
new file mode 100644
index 00000000..f64cd3d8
--- /dev/null
+++ b/httpcache/README.rst
@@ -0,0 +1,103 @@
+===========
+ httpcache
+===========
+
+Httpcache is a port of the caching algorithms in httplib2_ for use with
+requests_ session object.
+
+It was written because httplib2's better support for caching is often
+mitigated by its lack of threadsafety. The same is true of requests in
+terms of caching.
+
+
+Usage
+=====
+
+NOTE: Eventually, my hope is that this module can be integrated directly
+into requests. That said, I've had minimal exposure to requests, so I
+expect the initial implementation to be rather un-requests-like in
+terms of its API. Suggestions and patches welcome!
+
+Here is the basic usage: ::
+
+ import requests
+
+ from httpcache import CacheControl
+
+
+ sess = requests.session()
+ cached_sess = CacheControl(sess)
+
+ response = cached_sess.get('http://google.com')
+
+If the URL contains any caching based headers, it will cache the
+result in a simple dictionary.
+
+Below is the implementation of the DictCache, the default cache
+backend. It is extremely simple and shows how you would implement some
+other cache backend: ::
+
+ from httpcache.cache import BaseCache
+
+
+ class DictCache(BaseCache):
+
+ def __init__(self, init_dict=None):
+ self.data = init_dict or {}
+
+ def get(self, key):
+ return self.data.get(key, None)
+
+ def set(self, key, value):
+ self.data.update({key: value})
+
+ def delete(self, key):
+ self.data.pop(key)
+
+
+
+See? Really simple.
+
+
+Design
+======
+
+The CacheControl object's main task is to wrap the GET call of the
+session object. The caching takes place by examining the request to
+see if it should try to ue the cache. For example, if the request
+includes a 'no-cache' or 'max-age=0' Cache-Control header, it will not
+try to cache the request. If there is an cached value and its value
+has been deemed fresh, the it will return the cached response.
+
+If the request cannot be cached, the actual request is peformed. At
+this point we then analyze the response and see if we should add it to
+the cache. For example, if the request contains a 'max-age=3600' in
+the 'Cache-Control' header, it will cache the response before
+returning it to the caller.
+
+
+Tests
+=====
+
+The tests are all in httpcache/tests and is runnable by py.test.
+
+
+TODO
+====
+
+ [ ]- Better integration with requests
+ [ ]- ETags / if-* header support
+ [ ]- Tests that run a server from the stdlib
+
+
+Disclaimers
+===========
+
+Httpcache is brand new and maybe totally broken. I have some tests and
+it is a pretty direct port of httplib2 caching, which I've found to be
+very reliable. With that in mind, it hasn't been used in a production
+environment just yet. If you check it out and find bugs, let me know.
+
+
+.. _httplib2: http://code.google.com/p/httplib2/
+.. _requests: http://docs.python-requests.org/
diff --git a/httpcache/conftest.py b/httpcache/conftest.py
new file mode 100644
index 00000000..2fe30f8a
--- /dev/null
+++ b/httpcache/conftest.py
@@ -0,0 +1,41 @@
+import os
+import py.test
+import subprocess
+import requests
+import time
+
+
+class TestServer(object):
+
+ def start(self):
+ base = os.path.abspath(os.path.dirname(__file__))
+ server_file = os.path.join(base, 'httpcache', 'tests', 'server.py')
+ cmd = ['python', server_file]
+
+ kw = {}
+ if not os.environ.get('TEST_SERVER_OUTPUT'):
+ kw = {'stdout': subprocess.PIPE,
+ 'stderr': subprocess.STDOUT}
+ self.proc = subprocess.Popen(cmd, **kw)
+ url = 'http://localhost:8080'
+ up = None
+ while not up:
+ try:
+ up = requests.get(url)
+ except requests.ConnectionError:
+ time.sleep(1)
+
+ def stop(self):
+ self.proc.terminate()
+
+
+def pytest_namespace():
+ return dict(server=TestServer())
+
+
+def pytest_configure(config):
+ py.test.server.start()
+
+
+def pytest_unconfigure(config):
+ py.test.server.stop()
diff --git a/httpcache/httpcache/__init__.py b/httpcache/httpcache/__init__.py
new file mode 100644
index 00000000..7d3503fb
--- /dev/null
+++ b/httpcache/httpcache/__init__.py
@@ -0,0 +1,229 @@
+"""
+A caching wrapper for the requests session.
+"""
+import re
+import email
+import calendar
+import time
+
+import requests
+from cache import DictCache
+
+
+URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
+
+
+def parse_uri(uri):
+ """Parses a URI using the regex given in Appendix B of RFC 3986.
+
+ (scheme, authority, path, query, fragment) = parse_uri(uri)
+ """
+ groups = URI.match(uri).groups()
+ return (groups[1], groups[3], groups[4], groups[6], groups[8])
+
+
+def _parse_cache_control(headers):
+ """
+ Parse the cache control headers returning a dictionary with values
+ for the different directives.
+ """
+ retval = {}
+ if 'cache-control' in headers:
+ parts = headers['cache-control'].split(',')
+ parts_with_args = [
+ tuple([x.strip().lower() for x in part.split("=", 1)])
+ for part in parts if -1 != part.find("=")]
+ parts_wo_args = [(name.strip().lower(), 1)
+ for name in parts if -1 == name.find("=")]
+ retval = dict(parts_with_args + parts_wo_args)
+ return retval
+
+
+def urlnorm(uri):
+ (scheme, authority, path, query, fragment) = parse_uri(uri)
+ if not scheme or not authority:
+ raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
+ authority = authority.lower()
+ scheme = scheme.lower()
+ if not path:
+ path = "/"
+ # Could do syntax based normalization of the URI before
+ # computing the digest. See Section 6.2.2 of Std 66.
+ request_uri = query and "?".join([path, query]) or path
+ scheme = scheme.lower()
+ defrag_uri = scheme + "://" + authority + request_uri
+ return scheme, authority, request_uri, defrag_uri
+
+
+class CacheControl(object):
+
+ def __init__(self, session, cache=None):
+ self.session = session
+ self.cache = cache or DictCache()
+
+ def __getattr__(self, key):
+ if hasattr(self.session, key):
+ return getattr(self.session, key)
+ raise AttributeError('%s not found' % key)
+
+ def cache_url(self, url):
+ scheme, authority, request_uri, defrag_uri = urlnorm(url)
+ return defrag_uri
+
+ def cached_request(self, *args, **kw):
+ """
+ See if we should use a cached response. We are looking for
+ client conditions such as no-cache and testing our cached
+ value to see if we should use it or not.
+
+ This is taken almost directly from httplib2._entry_disposition
+ """
+ req = requests.Request(*args, **kw)
+ cache_url = self.cache_url(req.full_url)
+
+ cc = _parse_cache_control(req.headers)
+
+ # non-caching states
+ no_cache = False
+ if 'no-cache' in cc: no_cache = True
+ if 'max-age' in cc and cc['max-age'] == 0: no_cache = True
+
+ # see if it is in the cache anyways
+ in_cache = self.cache.get(cache_url)
+ if no_cache or not in_cache:
+ return False
+
+ # It is in the cache, so lets see if it is going to be
+ # fresh enough
+ resp = self.cache.get(cache_url)
+ now = time.time()
+ date = calendar.timegm(
+ email.Utils.parsedate_tz(resp.headers['date']))
+ current_age = max(0, now - date)
+
+ resp_cc = _parse_cache_control(resp.headers)
+
+ # determine freshness
+ freshness_lifetime = 0
+ if 'max-age' in resp_cc:
+ try:
+ freshness_lifetime = int(resp_cc['max-age'])
+ except ValueError:
+ pass
+ elif 'expires' in resp.headers:
+ expires = email.Utils.parsedate_tz(resp.headers['expires'])
+ if expires != None:
+ expire_time = calendar.timegm(expires) - date
+ freshness_lifetime = max(0, expire_time)
+
+ # determine if we are setting freshness limit in the req
+ if 'max-age' in cc:
+ try:
+ freshness_lifetime = int(cc['max-age'])
+ except ValueError:
+ freshness_lifetime = 0
+
+ if 'min-fresh' in cc:
+ try:
+ min_fresh = int(cc['min-fresh'])
+ except ValueError:
+ min_fresh = 0
+ # adjust our current age by our min fresh
+ current_age += min_fresh
+
+ # see how fresh we actually are
+ fresh = (freshness_lifetime > current_age)
+
+ if fresh:
+ # make sure we set the from_cache to true
+ resp.from_cache = True
+ return resp
+
+ # we're not fresh, clean out the junk
+ self.cache.delete(cache_url)
+
+ # return the original handler
+ return False
+
+ def cache_response(self, resp):
+ """
+ Algorithm for caching requests
+ """
+
+ # From httplib2: Don't cache 206's since we aren't going to
+ # handle byte range requests
+ if resp.status_code not in [200, 203]:
+ return
+
+ cc_req = _parse_cache_control(resp.request.headers)
+ cc = _parse_cache_control(resp.headers)
+
+ cache_url = self.cache_url(resp.request.full_url)
+
+ # Delete it from the cache if we happen to have it stored there
+ no_store = cc.get('no-store') or cc_req.get('no-store')
+ if no_store and self.cache.get(cache_url):
+ self.cache.delete(cache_url)
+
+ # Add to the cache if the response headers demand it. If there
+ # is no date header then we can't do anything about expiring
+ # the cache.
+ if 'date' in resp.headers:
+
+ # cache when there is a max-age > 0
+ if cc and cc.get('max-age'):
+ if int(cc['max-age']) > 0:
+ self.cache.set(cache_url, resp)
+
+ # If the request can expire, it means we should cache it
+ # in the meantime.
+ elif 'expires' in resp.headers:
+ if int(resp.headers['expires']) > 0:
+ self.cache.set(cache_url, resp)
+
+ def from_cache(f):
+ """
+ A decorator that allows using a cached response.
+ """
+ def cached_handler(self, *args, **kw):
+ # If we have a cached response use it
+ cached_response = self.cached_request(*args, **kw)
+ if cached_response:
+ return cached_response
+
+ # Else return original function's response
+ return f(self, *args, **kw)
+ return cached_handler
+
+ def invalidates_cache(f):
+ """
+ A decorator for marking methods that can invalidate the cache.
+ """
+
+ def invalidating_handler(self, *args, **kw):
+ resp = f(self, *args, **kw)
+ if resp.ok:
+ cache_url = self.cache_url(resp.request.full_url)
+ self.cache.delete(cache_url)
+ return resp
+ return invalidating_handler
+
+ @from_cache
+ def get(self, url, headers=None, *args, **kw):
+ resp = self.session.get(url, headers=headers, *args, **kw)
+ # We set this primarily for testing
+ resp.from_cache = False
+
+ # See if we need to cache the response
+ self.cache_response(resp)
+
+ # actually return the repsonse
+ return resp
+
+ @invalidates_cache
+ def put(self, *args, **kw):
+ return self.session.put(*args, **kw)
+
+ @invalidates_cache
+ def delete(self, *args, **kw):
+ return self.session.delete(*args, **kw)
diff --git a/httpcache/httpcache/cache.py b/httpcache/httpcache/cache.py
new file mode 100644
index 00000000..276cb0bd
--- /dev/null
+++ b/httpcache/httpcache/cache.py
@@ -0,0 +1,155 @@
+"""
+The cache object API for implementing caches. The default is just a
+dictionary, which in turns means it is not threadsafe for writing.
+"""
+import os
+import re
+import time
+
+from contextlib import contextmanager
+from threading import Lock
+from cPickle import dump, load
+from hashlib import md5
+
+
+class BaseCache(object):
+
+ def get(self, key):
+ raise NotImplemented()
+
+ def set(self, key, value):
+ raise NotImplemented()
+
+ def delete(self, key):
+ raise NotImplemented()
+
+
+class DictCache(BaseCache):
+
+ def __init__(self, init_dict=None):
+ self.data = init_dict or {}
+
+ def get(self, key):
+ return self.data.get(key, None)
+
+ def set(self, key, value):
+ self.data.update({key: value})
+
+ def delete(self, key):
+ if key in self.data:
+ self.data.pop(key)
+
+
+# Cache filename construction (original borrowed from Venus
+# http://intertwingly.net/code/venus/)
+re_url_scheme = re.compile(r'^\w+://')
+re_slash = re.compile(r'[?/:|]+')
+
+
+def safename(filename):
+ """Return a filename suitable for the cache.
+
+ Strips dangerous and common characters to create a filename we
+ can use to store the cache in.
+ """
+
+ try:
+ if re_url_scheme.match(filename):
+ if isinstance(filename, str):
+ filename = filename.decode('utf-8')
+ filename = filename.encode('idna')
+ else:
+ filename = filename.encode('idna')
+ except UnicodeError:
+ pass
+ if isinstance(filename, unicode):
+ filename = filename.encode('utf-8')
+ filemd5 = md5(filename).hexdigest()
+ filename = re_url_scheme.sub("", filename)
+ filename = re_slash.sub(",", filename)
+
+ # limit length of filename
+ if len(filename) > 200:
+ filename = filename[:200]
+ return ",".join((filename, filemd5))
+
+
+@contextmanager
+def filelock(key, mode, timeout=None, interval=.1):
+ """
+ A simple context manager that creates a temporary file for
+ locking a specific cache entry.
+
+ This was inspired pretty directly by:
+ http://amix.dk/blog/post/19531
+ """
+ lockfile = '%s.lock' % key
+ if timeout:
+ iterations = int(timeout / interval)
+ locked = os.path.exists(lockfile)
+ while locked:
+ error = FileCacheLockedException(lockfile, key)
+ if timeout:
+ # it is already locked, but we have a timeout if we
+ # want to try and block until it is available
+ time.sleep(interval)
+ iterations -= 1
+ locked = os.path.exists(lockfile)
+ if not iterations:
+ raise error
+ else:
+ # it is locked and we don't have a timeout, so raise
+ # the error
+ error
+
+ with open(lockfile, 'w+') as file_lock:
+ file_lock.write('1')
+ with open(key, mode) as opened_file:
+ yield opened_file
+ os.remove(lockfile)
+
+
+class FileCacheLockedException(Exception):
+
+ def __init__(self, lockfile, fname):
+ self.lockfile, self.fname = lockfile, fname
+ msg = '"%s" is locked. Try removing "%s"' % (lockfile, fname)
+ super(FileCacheLockedException, self).__init__(msg)
+
+
+class FileCache(BaseCache):
+ """
+ A simple threadsafe file based cache.
+
+ The file cache is a port of httplib2's directory cache. The only
+ difference is that it uses a lock when writing the file and
+ pickles the value. The pickling is used b/c we are using
+ requests.Response objects.
+ """
+
+ def __init__(self, cache, worker=None):
+ self.cache = cache
+ self.safe = safename
+ if not os.path.exists(cache):
+ os.makedirs(self.cache)
+ self.lock = Lock()
+
+ def get(self, key):
+ retval = None
+ cache_full_path = os.path.join(self.cache, safename(key))
+ try:
+ with open(cache_full_path, "rb") as f:
+ retval = load(f)
+ except (IOError, EOFError):
+ pass
+ return retval
+
+ def set(self, key, value):
+ cache_full_path = os.path.join(self.cache, safename(key))
+ with filelock(cache_full_path, "wb", timeout=1) as f:
+ dump(value, f)
+
+ def delete(self, key):
+ cache_full_path = os.path.join(self.cache, safename(key))
+ if os.path.exists(cache_full_path):
+ os.remove(cache_full_path)
diff --git a/httpcache/httpcache/tests/__init__.py b/httpcache/httpcache/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/httpcache/httpcache/tests/__init__.py
diff --git a/httpcache/httpcache/tests/integration/test_caching_conditions.py b/httpcache/httpcache/tests/integration/test_caching_conditions.py
new file mode 100644
index 00000000..59d35567
--- /dev/null
+++ b/httpcache/httpcache/tests/integration/test_caching_conditions.py
@@ -0,0 +1,44 @@
+from httpcache import CacheControl
+import requests
+
+
+class TestCachingConditions(object):
+
+ def test_no_caching_directives(self):
+ url = 'http://localhost:8080/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ r = c.get(url)
+
+ assert r
+ assert r.content == 'foo'
+ assert not c.cache.get(url)
+
+ def test_cache_max_age(self):
+ url = 'http://localhost:8080/max_age/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ r = c.get(url)
+ assert c.cache.get(url)
+ assert c.cache.get(url) == r
+
+ def test_cache_no_cache(self):
+ url = 'http://localhost:8080/no_cache/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ c.get(url)
+ assert not c.cache.get(url)
+
+ def test_cache_must_revalidate(self):
+ url = 'http://localhost:8080/must_revalidate/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ c.get(url)
+ assert not c.cache.get(url)
+
+ def test_cache_no_store(self):
+ url = 'http://localhost:8080/no_store/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ c.get(url)
+ assert not c.cache.get(url)
diff --git a/httpcache/httpcache/tests/server.py b/httpcache/httpcache/tests/server.py
new file mode 100644
index 00000000..140c3f1d
--- /dev/null
+++ b/httpcache/httpcache/tests/server.py
@@ -0,0 +1,36 @@
+from __future__ import print_function
+import cherrypy
+
+
+class CacheTestingServer(object):
+
+ def index(self):
+ return 'foo'
+ index.exposed = True
+
+ def max_age(self, value=None):
+ age = 'max-age=%s' % (value or 300)
+ cherrypy.response.headers['Cache-Control'] = age
+ return 'max age'
+ max_age.exposed = True
+
+ def no_cache(self):
+ cherrypy.response.headers['Cache-Control'] = 'no-cache'
+ return 'no cache'
+ no_cache.exposed = True
+
+ def must_revalidate(self):
+ cherrypy.response.headers['Cache-Control'] = 'must-revalidate'
+ return 'must revalidate'
+ must_revalidate.exposed = True
+
+ def no_store(self):
+ cherrypy.response.headers['Cache-Control'] = 'no-store'
+ return 'no store'
+ no_store.exposed = True
+
+
+if __name__ == '__main__':
+ cherrypy.tree.mount(CacheTestingServer(), '/')
+ cherrypy.engine.start()
+ cherrypy.engine.block()
diff --git a/httpcache/httpcache/tests/test_cache.py b/httpcache/httpcache/tests/test_cache.py
new file mode 100644
index 00000000..4efbc5c4
--- /dev/null
+++ b/httpcache/httpcache/tests/test_cache.py
@@ -0,0 +1,76 @@
+"""
+Tests for our caches
+"""
+import os
+
+from threading import Thread
+# from multiprocessing import Process
+from httpcache.cache import FileCache, safename, filelock
+from httpcache.cache import FileCacheLockedException
+
+
+class RandomWriter(Thread):
+ def __init__(self, cache):
+ super(RandomWriter, self).__init__()
+ self.key = 'threading_test'
+ self.cache = cache
+
+ def run(self):
+ for x in range(0, 5):
+ value = x * 1000
+ self.cache.set(self.key, value)
+ assert self.cache.get(self.key) == value
+
+
+class TestFileCache(object):
+
+ cache_dir = 'test_file_cache'
+
+ def test_set_get_delete(self):
+ fc = FileCache(self.cache_dir)
+ fc.set('foo', 'bar')
+ assert fc.get('foo') == 'bar'
+ fc.delete('foo')
+ assert fc.get('foo') == None
+
+ def test_setting_with_multiple_threads(self):
+ fc = FileCache(self.cache_dir)
+ w1 = RandomWriter(fc)
+ w2 = RandomWriter(fc)
+ w1.start()
+ w2.start()
+ w1.join()
+ w2.join()
+ assert fc.get(w1.key) == 4000
+
+ def test_locked_raises_exception(self):
+ key, value = 'locked', {'foo': 'bar'}
+ fc = FileCache(self.cache_dir)
+ lockfile = os.path.join(self.cache_dir,
+ '%s.lock' % safename(key))
+ with open(lockfile, 'w+') as lock:
+ lock.write('1')
+ assert os.path.exists(lockfile)
+ try:
+ fc.set(key, value)
+ assert False
+ except FileCacheLockedException:
+ assert True
+ os.remove(lockfile)
+ fc.set(key, value)
+ assert fc.get(key) == value
+ assert not os.path.exists(lockfile)
+
+ def test_filelock_timeout(self):
+ fc = FileCache(self.cache_dir)
+ fname = safename('locked')
+ lockfile = '%s.lock' % fname
+ with open(lockfile, 'w+') as lock:
+ lock.write('1')
+ try:
+ with filelock(fname, 'w+', .5):
+ assert False
+ except FileCacheLockedException:
+ assert True
+ os.remove(lockfile)
+ assert not os.path.exists(lockfile)
diff --git a/httpcache/httpcache/tests/test_cache_control.py b/httpcache/httpcache/tests/test_cache_control.py
new file mode 100644
index 00000000..c06fabbb
--- /dev/null
+++ b/httpcache/httpcache/tests/test_cache_control.py
@@ -0,0 +1,167 @@
+"""
+Unit tests that verify our caching methods work correctly.
+"""
+import mock
+import datetime
+import time
+
+from httpcache import CacheControl
+from httpcache.cache import DictCache
+
+
+TIME_FMT = "%a, %d %b %Y %H:%M:%S"
+
+
+class TestCacheControlResponse(object):
+ url = 'http://url.com/'
+
+ def req(self, headers=None):
+ headers = headers or {}
+ return mock.Mock(full_url=self.url, headers=headers)
+
+ def resp(self, headers=None):
+ headers = headers or {}
+ return mock.Mock(status_code=200,
+ headers=headers,
+ request=self.req())
+
+ def cache(self):
+ return CacheControl(mock.Mock(), mock.MagicMock())
+
+ def test_no_cache_non_20x_response(self):
+ c = self.cache()
+
+ # No caching without some extra headers, so we add them
+ now = datetime.datetime.utcnow().strftime(TIME_FMT)
+ resp = self.resp({'cache-control': 'max-age=3600',
+ 'date': now})
+
+ no_cache_codes = [201, 300, 400, 500]
+ for code in no_cache_codes:
+ resp.status_code = code
+ c.cache_response(resp)
+ assert not c.cache.set.called
+
+ # this should work b/c the resp is 20x
+ resp.status_code = 203
+ c.cache_response(resp)
+ assert c.cache.set.called
+
+ def test_no_cache_with_no_date(self):
+ c = self.cache()
+
+ # No date header which makes our max-age pointless
+ resp = self.resp({'cache-control': 'max-age=3600'})
+ c.cache_response(resp)
+
+ assert not c.cache.set.called
+
+ def test_cache_response_no_cache_control(self):
+ c = self.cache()
+ resp = self.resp()
+ c.cache_response(resp)
+
+ assert not c.cache.set.called
+
+ def test_cache_response_cache_max_age(self):
+ c = self.cache()
+
+ now = datetime.datetime.utcnow().strftime(TIME_FMT)
+ resp = self.resp({'cache-control': 'max-age=3600',
+ 'date': now})
+ c.cache_response(resp)
+ c.cache.set.assert_called_with(self.url, resp)
+
+ def test_cache_repsonse_no_store(self):
+ resp = mock.Mock()
+ cache = DictCache({self.url: resp})
+ c = CacheControl(resp, cache)
+
+ cache_url = c.cache_url(self.url)
+
+ resp = self.resp({'cache-control': 'no-store'})
+ assert c.cache.get(cache_url)
+
+ c.cache_response(resp)
+ assert not c.cache.get(cache_url)
+
+
+class TestCacheControlRequest(object):
+
+ url = 'http://foo.com'
+
+ def test_cache_request_no_cache(self):
+ c = CacheControl(mock.Mock())
+ hdrs = {'cache-control': 'no-cache'}
+ resp = c.cached_request(self.url, headers=hdrs)
+ assert not resp
+
+ def test_cache_request_pragma_no_cache(self):
+ c = CacheControl(mock.Mock())
+ hdrs = {'pragma': 'no-cache'}
+ resp = c.cached_request(self.url, headers=hdrs)
+ assert not resp
+
+ def test_cache_request_no_store(self):
+ c = CacheControl(mock.Mock())
+ hdrs = {'cache-control': 'no-store'}
+ resp = c.cached_request(self.url, headers=hdrs)
+ assert not resp
+
+ def test_cache_request_max_age_0(self):
+ c = CacheControl(mock.Mock())
+ hdrs = {'cache-control': 'max-age=0'}
+ resp = c.cached_request(self.url, headers=hdrs)
+ assert not resp
+
+ def test_cache_request_not_in_cache(self):
+ c = CacheControl(mock.Mock())
+ resp = c.cached_request(self.url)
+ assert not resp
+
+ def test_cache_request_fresh_max_age(self):
+ now = datetime.datetime.utcnow().strftime(TIME_FMT)
+ resp = mock.Mock(headers={'cache-control': 'max-age=3600',
+ 'date': now})
+
+ # NOTE: httplib2 uses its own algorithm for finding the
+ # "defrag_uri" in order to use it for creating a cache key. It
+ # seems to append the trailing slash, which I'm pretty sure is
+ # b/c of the auto directory rules. I'm trusting it is correct.
+ cache = DictCache({self.url + '/': resp})
+ c = CacheControl(mock.Mock(), cache)
+ r = c.cached_request(self.url)
+ assert r == resp
+
+ def test_cache_request_unfresh_max_age(self):
+ earlier = time.time() - 3700
+ now = datetime.datetime.fromtimestamp(earlier).strftime(TIME_FMT)
+
+ resp = mock.Mock(headers={'cache-control': 'max-age=3600',
+ 'date': now})
+ cache = DictCache({self.url: resp})
+ c = CacheControl(mock.Mock(), cache)
+ r = c.cached_request(self.url)
+ assert not r
+
+ def test_cache_request_fresh_expires(self):
+ later = datetime.timedelta(days=1)
+ expires = (datetime.datetime.utcnow() + later).strftime(TIME_FMT)
+ now = datetime.datetime.utcnow().strftime(TIME_FMT)
+ resp = mock.Mock(headers={'expires': expires,
+ 'date': now})
+ cache = DictCache({self.url + '/': resp})
+ c = CacheControl(mock.Mock, cache)
+ r = c.cached_request(self.url)
+ assert r == resp
+
+ def test_cache_request_unfresh_expires(self):
+ later = datetime.timedelta(days=-1)
+ expires = (datetime.datetime.utcnow() + later).strftime(TIME_FMT)
+ now = datetime.datetime.utcnow().strftime(TIME_FMT)
+ resp = mock.Mock(headers={'expires': expires,
+ 'date': now})
+ cache = DictCache({self.url: resp})
+ c = CacheControl(mock.Mock, cache)
+ r = c.cached_request(self.url)
+ assert not r
diff --git a/httpcache/httpcache/tests/test_cache_invalidation.py b/httpcache/httpcache/tests/test_cache_invalidation.py
new file mode 100644
index 00000000..017a3d12
--- /dev/null
+++ b/httpcache/httpcache/tests/test_cache_invalidation.py
@@ -0,0 +1,39 @@
+"""
+When resources are known to be updated via HTTP (ie PUT, DELETE), we
+should invalidate them in our cache.
+"""
+import mock
+
+from httpcache import CacheControl
+from httpcache.cache import DictCache
+
+
+class TestInvalidations(object):
+
+ url = 'http://foo.com/bar/'
+
+ def resp(self):
+ req = mock.Mock(full_url=self.url)
+ return mock.Mock(request=req)
+
+ def test_put_invalidates_cache(self):
+ # Prep our cache
+ resp = self.resp()
+ cache = DictCache({self.url: resp})
+ session = mock.Mock(put=mock.Mock(return_value=resp))
+ c = CacheControl(session, cache)
+
+ c.put(self.url)
+
+ assert not c.cache.get(self.url)
+
+ def test_delete_invalidates_cache(self):
+ # Prep our cache
+ resp = self.resp()
+ cache = DictCache({self.url: resp})
+ session = mock.Mock(delete=mock.Mock(return_value=resp))
+ c = CacheControl(session, cache)
+
+ c.delete(self.url)
+
+ assert not c.cache.get(self.url)
diff --git a/httpcache/httpcache/tests/test_max_age.py b/httpcache/httpcache/tests/test_max_age.py
new file mode 100644
index 00000000..f33f501b
--- /dev/null
+++ b/httpcache/httpcache/tests/test_max_age.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+from .. import CacheControl
+import requests
+
+
+class TestMaxAge(object):
+
+ def test_client_max_age_0(self):
+ """
+ Making sure when the client uses max-age=0 we don't get a
+ cached copy even though we're still fresh.
+ """
+ url = 'http://localhost:8080/max_age/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ print('first request')
+ r = c.get(url)
+ cache_url = c.cache_url(url)
+ assert c.cache.get(cache_url) == r
+
+ print('second request')
+ r = c.get(url, headers={'Cache-Control': 'max-age=0'})
+
+ # don't remove from the cache
+ assert c.cache.get(cache_url)
+ assert r.from_cache == False
+
+ def test_client_max_age_3600(self):
+ """
+ Verify we get a cached value when the client has a
+ reasonable max-age value.
+ """
+ # prep our cache
+ url = 'http://localhost:8080/max_age/'
+ s = requests.Session()
+ c = CacheControl(s, {})
+ r = c.get(url)
+ cache_url = c.cache_url(url)
+ assert c.cache.get(cache_url) == r
+
+ # request that we don't want a new one unless
+ r = c.get(url, headers={'Cache-Control': 'max-age=3600'})
+ assert r.from_cache == True
+
+ # now lets grab one that forces a new request b/c the cache
+ # has expired. To do that we'll inject a new time value.
+ resp = c.cache.get(cache_url)
+ resp.headers['date'] = 'Tue, 15 Nov 1994 08:12:31 GMT'
+ r = c.get(url)
+ assert r.from_cache == False
diff --git a/httpcache/setup.py b/httpcache/setup.py
new file mode 100644
index 00000000..8d188634
--- /dev/null
+++ b/httpcache/setup.py
@@ -0,0 +1,18 @@
+import setuptools
+
+setup_params = dict(
+ name='HTTPCache',
+ version='0.5',
+ author="Eric Larson",
+ author_email="eric@ionrock.org",
+ url="https://bitbucket.org/elarson/httpcache",
+ packages=setuptools.find_packages(),
+ tests_requires=[
+ 'py.test',
+ 'cherrypy',
+ ],
+)
+
+
+if __name__ == '__main__':
+ setuptools.setup(**setup_params)
diff --git a/py-reqcache b/py-reqcache
new file mode 160000
+Subproject 5b4f3e5217c2d1b32818bf4d8121a0a243e67db
diff --git a/requests/cache.py b/requests/cache.py
index 5be1dfbe..373e53c3 100644
--- a/requests/cache.py
+++ b/requests/cache.py
@@ -7,7 +7,11 @@ requests.cache
Requests caching layer.
"""
+
+import time
import hashlib
+import calendar
+import email
from .packages.cachecore import SimpleCache
DEFAULT_CACHE = SimpleCache
@@ -25,20 +29,16 @@ def expand_cache(c):
if c is False:
return Cache(backend=False)
-def request_hash(r, type=''):
- """Returns a SHA256(type-method-url) for cache keys."""
-
- s = '{0}-{1}-{2}'.format(type, r.request.method, r.request.full_url)
- return hashlib.sha256(s).hexdigest()
class Cache(object):
"""A Cache session."""
- def __init__(self, backend=None, conditional=None, content=True):
+ def __init__(self, backend=None, conditional=None, content=True, handler=None):
self.conditional = None
self.content = None
+ self.handler = handler
# Default to backend if True.
if not backend is None:
@@ -54,3 +54,141 @@ class Cache(object):
if content is not True:
self.content = content
+ if handler is None:
+ self.handler = CacheHandler()
+
+
+
+class CacheHandler(object):
+
+
+ @staticmethod
+ def request_hash(r, type=''):
+ """Returns a SHA256(type-method-url) for cache keys."""
+
+ s = '{0}-{1}-{2}'.format(type, r.request.method, r.request.full_url)
+ return hashlib.sha256(s).hexdigest()
+
+
+ @staticmethod
+ def _parse_cache_control(headers):
+ """
+ Parse the cache control headers returning a dictionary with values
+ for the different directives.
+ """
+ retval = {}
+ if 'cache-control' in headers:
+ parts = headers['cache-control'].split(',')
+ parts_with_args = [
+ tuple([x.strip().lower() for x in part.split("=", 1)])
+ for part in parts if -1 != part.find("=")]
+ parts_wo_args = [(name.strip().lower(), 1)
+ for name in parts if -1 == name.find("=")]
+ retval = dict(parts_with_args + parts_wo_args)
+ return retval
+
+ def cache_request(self, r, cache):
+ """See if we should use a cached response."""
+
+ return True
+ cache_hash = self.request_hash(r)
+ cc = self._parse_cache_control(r.headers)
+
+ # non-caching states
+ no_cache = False
+ if 'no-cache' in cc:
+ no_cache = True
+ if 'max-age' in cc and cc['max-age'] == 0:
+ no_cache = True
+
+ # see if it is in the cache anyways
+ in_cache = cache.get(cache_hash)
+ if no_cache or not in_cache:
+ return False
+
+ # It is in the cache, so lets see if it is going to be
+ # fresh enough
+ resp = cache.get(cache_hash)
+ now = time.time()
+ date = calendar.timegm(
+ email.Utils.parsedate_tz(resp.headers['date']))
+ current_age = max(0, now - date)
+
+ resp_cc = self._parse_cache_control(resp.headers)
+
+
+ # determine freshness
+ freshness_lifetime = 0
+ if 'max-age' in resp_cc:
+ try:
+ freshness_lifetime = int(resp_cc['max-age'])
+ except ValueError:
+ pass
+ elif 'expires' in resp.headers:
+ expires = email.Utils.parsedate_tz(resp.headers['expires'])
+ if expires != None:
+ expire_time = calendar.timegm(expires) - date
+ freshness_lifetime = max(0, expire_time)
+
+ # determine if we are setting freshness limit in the req
+ if 'max-age' in cc:
+ try:
+ freshness_lifetime = int(cc['max-age'])
+ except ValueError:
+ freshness_lifetime = 0
+
+ if 'min-fresh' in cc:
+ try:
+ min_fresh = int(cc['min-fresh'])
+ except ValueError:
+ min_fresh = 0
+ # adjust our current age by our min fresh
+ current_age += min_fresh
+
+ # see how fresh we actually are
+ fresh = (freshness_lifetime > current_age)
+
+ if fresh:
+ # make sure we set the from_cache to true
+ resp.from_cache = True
+ return resp
+
+ # we're not fresh, clean out the junk
+ cache.delete(cache_hash)
+
+ # return the original handler
+ return False
+
+
+ def cache_response(self, r, cache):
+ """Algorithm for caching requests."""
+
+ if r.status_code not in [200, 203]:
+ return
+
+ cc_req = self._parse_cache_control(r.request.headers)
+ cc = self._parse_cache_control(r.headers)
+
+ # cache_url = self.cache_url(r.request.full_url)
+ cache_hash = self.request_hash(r)
+
+ # Delete it from the cache if we happen to have it stored there
+ no_store = cc.get('no-store') or cc_req.get('no-store')
+ if no_store and cache.get(cache_hash):
+ cache.delete(cache_hash)
+
+ # Add to the cache if the response headers demand it. If there
+ # is no date header then we can't do anything about expiring
+ # the cache.
+ if 'date' in r.headers:
+
+ # cache when there is a max-age > 0
+ if cc and cc.get('max-age'):
+ if int(cc['max-age']) > 0:
+ cache.set(cache_hash, r)
+
+ # If the request can expire, it means we should cache it
+ # in the meantime.
+ elif 'expires' in r.headers:
+ if int(r.headers['expires']) > 0:
+ cache.set(cache_hash, r)
diff --git a/requests/sessions.py b/requests/sessions.py
index 5e827543..f6416777 100644
--- a/requests/sessions.py
+++ b/requests/sessions.py
@@ -11,6 +11,7 @@ requests (cookies, auth, proxies).
from copy import deepcopy
from .compat import cookielib
+from .cache import expand_cache
from .cookies import cookiejar_from_dict, remove_cookie_by_name
from .defaults import defaults
from .models import Request
@@ -81,10 +82,11 @@ class Session(object):
self.prefetch = prefetch
self.verify = verify
self.cert = cert
- self.cache = cache
+ self.cache = expand_cache(cache)
- from .cache import ReqCache
- self.cache = ReqCache("test", "memory")
+
+ # from .cache import ReqCache
+ # self.cache = ReqCache("test", "memory")
for (k, v) in list(defaults.items()):
self.config.setdefault(k, deepcopy(v))
diff --git a/t.py b/t.py
new file mode 100644
index 00000000..8e6921c8
--- /dev/null
+++ b/t.py
@@ -0,0 +1,26 @@
+# mycache = ReqCache("test", "memory")
+import requests
+s = requests.session()
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+# r = requests.get('http://github.com', hooks=mycache.hooks)
+
+# r = requests.get('http://github.com', hooks=mycache.hooks)
+# explain_cache_result(r)