brain dumpcache

author: Kenneth Reitz <me@kennethreitz.com> 2012-10-16 13:39:00 -0700
committer: Kenneth Reitz <me@kennethreitz.com> 2012-10-16 13:39:00 -0700
commit: e3c51e4182c54ffc494a7327e69004342844b0a4 (patch)
tree: 6f8a68415b5adc4109cd63b56d134e48535288b3
parent: 706f88fedd7c526bc17ec9341a75ace1486d0a60 (diff)
download: python-requests-cache.tar.gz
16 files changed, 1133 insertions, 9 deletions
diff --git a/httpcache/README.rst b/httpcache/README.rst
new file mode 100644
index 00000000..f64cd3d8
--- /dev/null
+++ b/httpcache/README.rst
@@ -0,0 +1,103 @@
+===========
+ httpcache
+===========
+
+Httpcache is a port of the caching algorithms in httplib2_ for use with
+requests_ session object. 
+
+It was written because httplib2's better support for caching is often
+mitigated by its lack of threadsafety. The same is true of requests in
+terms of caching.
+
+
+Usage
+=====
+
+NOTE: Eventually, my hope is that this module can be integrated directly
+into requests. That said, I've had minimal exposure to requests, so I
+expect the initial implementation to be rather un-requests-like in
+terms of its API. Suggestions and patches welcome!
+
+Here is the basic usage: ::
+
+  import requests
+
+  from httpcache import CacheControl
+
+
+  sess = requests.session()
+  cached_sess = CacheControl(sess)
+
+  response = cached_sess.get('http://google.com')
+
+If the URL contains any caching based headers, it will cache the
+result in a simple dictionary. 
+
+Below is the implementation of the DictCache, the default cache
+backend. It is extremely simple and shows how you would implement some
+other cache backend: ::
+
+  from httpcache.cache import BaseCache
+
+
+  class DictCache(BaseCache):
+   
+      def __init__(self, init_dict=None):
+          self.data = init_dict or {}
+   
+      def get(self, key):
+          return self.data.get(key, None)
+   
+      def set(self, key, value):
+          self.data.update({key: value})
+   
+      def delete(self, key):
+          self.data.pop(key)
+
+  
+
+See? Really simple.
+
+
+Design
+======
+
+The CacheControl object's main task is to wrap the GET call of the
+session object. The caching takes place by examining the request to
+see if it should try to ue the cache. For example, if the request
+includes a 'no-cache' or 'max-age=0' Cache-Control header, it will not
+try to cache the request. If there is an cached value and its value
+has been deemed fresh, the it will return the cached response.
+
+If the request cannot be cached, the actual request is peformed. At
+this point we then analyze the response and see if we should add it to
+the cache. For example, if the request contains a 'max-age=3600' in
+the 'Cache-Control' header, it will cache the response before
+returning it to the caller. 
+
+
+Tests
+=====
+
+The tests are all in httpcache/tests and is runnable by py.test. 
+
+
+TODO
+====
+
+ [ ]- Better integration with requests
+ [ ]- ETags / if-* header support
+ [ ]- Tests that run a server from the stdlib
+
+
+Disclaimers
+===========
+
+Httpcache is brand new and maybe totally broken. I have some tests and
+it is a pretty direct port of httplib2 caching, which I've found to be
+very reliable. With that in mind, it hasn't been used in a production
+environment just yet. If you check it out and find bugs, let me know.
+
+
+.. _httplib2: http://code.google.com/p/httplib2/
+.. _requests: http://docs.python-requests.org/ 
diff --git a/httpcache/conftest.py b/httpcache/conftest.py
new file mode 100644
index 00000000..2fe30f8a
--- /dev/null
+++ b/httpcache/conftest.py
@@ -0,0 +1,41 @@
+import os
+import py.test
+import subprocess
+import requests
+import time
+
+
+class TestServer(object):
+
+    def start(self):
+        base = os.path.abspath(os.path.dirname(__file__))
+        server_file = os.path.join(base, 'httpcache', 'tests', 'server.py')
+        cmd = ['python', server_file]
+
+        kw = {}
+        if not os.environ.get('TEST_SERVER_OUTPUT'):
+            kw = {'stdout': subprocess.PIPE,
+                  'stderr': subprocess.STDOUT}
+        self.proc = subprocess.Popen(cmd, **kw)
+        url = 'http://localhost:8080'
+        up = None
+        while not up:
+            try:
+                up = requests.get(url)
+            except requests.ConnectionError:
+                time.sleep(1)
+
+    def stop(self):
+        self.proc.terminate()
+
+
+def pytest_namespace():
+    return dict(server=TestServer())
+
+
+def pytest_configure(config):
+    py.test.server.start()
+
+
+def pytest_unconfigure(config):
+    py.test.server.stop()
diff --git a/httpcache/httpcache/__init__.py b/httpcache/httpcache/__init__.py
new file mode 100644
index 00000000..7d3503fb
--- /dev/null
+++ b/httpcache/httpcache/__init__.py
@@ -0,0 +1,229 @@
+"""
+A caching wrapper for the requests session.
+"""
+import re
+import email
+import calendar
+import time
+
+import requests
+from cache import DictCache
+
+
+URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
+
+
+def parse_uri(uri):
+    """Parses a URI using the regex given in Appendix B of RFC 3986.
+
+        (scheme, authority, path, query, fragment) = parse_uri(uri)
+    """
+    groups = URI.match(uri).groups()
+    return (groups[1], groups[3], groups[4], groups[6], groups[8])
+
+
+def _parse_cache_control(headers):
+    """
+    Parse the cache control headers returning a dictionary with values
+    for the different directives.
+    """
+    retval = {}
+    if 'cache-control' in headers:
+        parts = headers['cache-control'].split(',')
+        parts_with_args = [
+            tuple([x.strip().lower() for x in part.split("=", 1)])
+            for part in parts if -1 != part.find("=")]
+        parts_wo_args = [(name.strip().lower(), 1)
+                         for name in parts if -1 == name.find("=")]
+        retval = dict(parts_with_args + parts_wo_args)
+    return retval
+
+
+def urlnorm(uri):
+    (scheme, authority, path, query, fragment) = parse_uri(uri)
+    if not scheme or not authority:
+        raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
+    authority = authority.lower()
+    scheme = scheme.lower()
+    if not path:
+        path = "/"
+    # Could do syntax based normalization of the URI before
+    # computing the digest. See Section 6.2.2 of Std 66.
+    request_uri = query and "?".join([path, query]) or path
+    scheme = scheme.lower()
+    defrag_uri = scheme + "://" + authority + request_uri
+    return scheme, authority, request_uri, defrag_uri
+
+
+class CacheControl(object):
+
+    def __init__(self, session, cache=None):
+        self.session = session
+        self.cache = cache or DictCache()
+
+    def __getattr__(self, key):
+        if hasattr(self.session, key):
+            return getattr(self.session, key)
+        raise AttributeError('%s not found' % key)
+
+    def cache_url(self, url):
+        scheme, authority, request_uri, defrag_uri = urlnorm(url)
+        return defrag_uri
+
+    def cached_request(self, *args, **kw):
+        """
+        See if we should use a cached response. We are looking for
+        client conditions such as no-cache and testing our cached
+        value to see if we should use it or not.
+
+        This is taken almost directly from httplib2._entry_disposition
+        """
+        req = requests.Request(*args, **kw)
+        cache_url = self.cache_url(req.full_url)
+
+        cc = _parse_cache_control(req.headers)
+
+        # non-caching states
+        no_cache = False
+        if 'no-cache' in cc: no_cache = True
+        if 'max-age' in cc and cc['max-age'] == 0: no_cache = True
+
+        # see if it is in the cache anyways
+        in_cache = self.cache.get(cache_url)
+        if no_cache or not in_cache:
+            return False
+
+        # It is in the cache, so lets see if it is going to be
+        # fresh enough
+        resp = self.cache.get(cache_url)
+        now = time.time()
+        date = calendar.timegm(
+            email.Utils.parsedate_tz(resp.headers['date']))
+        current_age = max(0, now - date)
+
+        resp_cc = _parse_cache_control(resp.headers)
+
+        # determine freshness
+        freshness_lifetime = 0
+        if 'max-age' in resp_cc:
+            try:
+                freshness_lifetime = int(resp_cc['max-age'])
+            except ValueError:
+                pass
+        elif 'expires' in resp.headers:
+            expires = email.Utils.parsedate_tz(resp.headers['expires'])
+            if expires != None:
+                expire_time = calendar.timegm(expires) - date
+                freshness_lifetime = max(0, expire_time)
+
+        # determine if we are setting freshness limit in the req
+        if 'max-age' in cc:
+            try:
+                freshness_lifetime = int(cc['max-age'])
+            except ValueError:
+                freshness_lifetime = 0
+
+        if 'min-fresh' in cc:
+            try:
+                min_fresh = int(cc['min-fresh'])
+            except ValueError:
+                min_fresh = 0
+            # adjust our current age by our min fresh
+            current_age += min_fresh
+
+        # see how fresh we actually are
+        fresh = (freshness_lifetime > current_age)
+
+        if fresh:
+            # make sure we set the from_cache to true
+            resp.from_cache = True
+            return resp
+
+        # we're not fresh, clean out the junk
+        self.cache.delete(cache_url)
+
+        # return the original handler
+        return False
+
+    def cache_response(self, resp):
+        """
+        Algorithm for caching requests
+        """
+
+        # From httplib2: Don't cache 206's since we aren't going to
+        # handle byte range requests
+        if resp.status_code not in [200, 203]:
+            return
+
+        cc_req = _parse_cache_control(resp.request.headers)
+        cc = _parse_cache_control(resp.headers)
+
+        cache_url = self.cache_url(resp.request.full_url)
+
+        # Delete it from the cache if we happen to have it stored there
+        no_store = cc.get('no-store') or cc_req.get('no-store')
+        if no_store and self.cache.get(cache_url):
+            self.cache.delete(cache_url)
+
+        # Add to the cache if the response headers demand it. If there
+        # is no date header then we can't do anything about expiring
+        # the cache.
+        if 'date' in resp.headers:
+
+            # cache when there is a max-age > 0
+            if cc and cc.get('max-age'):
+                if int(cc['max-age']) > 0:
+                    self.cache.set(cache_url, resp)
+
+            # If the request can expire, it means we should cache it
+            # in the meantime.
+            elif 'expires' in resp.headers:
+                if int(resp.headers['expires']) > 0:
+                    self.cache.set(cache_url, resp)
+
+    def from_cache(f):
+        """
+        A decorator that allows using a cached response.
+        """
+        def cached_handler(self, *args, **kw):
+            # If we have a cached response use it
+            cached_response = self.cached_request(*args, **kw)
+            if cached_response:
+                return cached_response
+
+            # Else return original function's response
+            return f(self, *args, **kw)
+        return cached_handler
+
+    def invalidates_cache(f):
+        """
+        A decorator for marking methods that can invalidate the cache.
+        """
+
+        def invalidating_handler(self, *args, **kw):
+            resp = f(self, *args, **kw)
+            if resp.ok:
+                cache_url = self.cache_url(resp.request.full_url)
+                self.cache.delete(cache_url)
+            return resp
+        return invalidating_handler
+
+    @from_cache
+    def get(self, url, headers=None, *args, **kw):
+        resp = self.session.get(url, headers=headers, *args, **kw)
+        # We set this primarily for testing
+        resp.from_cache = False
+
+        # See if we need to cache the response
+        self.cache_response(resp)
+
+        # actually return the repsonse
+        return resp
+
+    @invalidates_cache
+    def put(self, *args, **kw):
+        return self.session.put(*args, **kw)
+
+    @invalidates_cache
+    def delete(self, *args, **kw):
+        return self.session.delete(*args, **kw)
diff --git a/httpcache/httpcache/cache.py b/httpcache/httpcache/cache.py
new file mode 100644
index 00000000..276cb0bd
--- /dev/null
+++ b/httpcache/httpcache/cache.py
@@ -0,0 +1,155 @@
+"""
+The cache object API for implementing caches. The default is just a
+dictionary, which in turns means it is not threadsafe for writing.
+"""
+import os
+import re
+import time
+
+from contextlib import contextmanager
+from threading import Lock
+from cPickle import dump, load
+from hashlib import md5
+
+
+class BaseCache(object):
+
+    def get(self, key):
+        raise NotImplemented()
+
+    def set(self, key, value):
+        raise NotImplemented()
+
+    def delete(self, key):
+        raise NotImplemented()
+
+
+class DictCache(BaseCache):
+
+    def __init__(self, init_dict=None):
+        self.data = init_dict or {}
+
+    def get(self, key):
+        return self.data.get(key, None)
+
+    def set(self, key, value):
+        self.data.update({key: value})
+
+    def delete(self, key):
+        if key in self.data:
+            self.data.pop(key)
+
+
+# Cache filename construction (original borrowed from Venus
+# http://intertwingly.net/code/venus/)
+re_url_scheme = re.compile(r'^\w+://')
+re_slash = re.compile(r'[?/:|]+')
+
+
+def safename(filename):
+    """Return a filename suitable for the cache.
+
+    Strips dangerous and common characters to create a filename we
+    can use to store the cache in.
+    """
+
+    try:
+        if re_url_scheme.match(filename):
+            if isinstance(filename, str):
+                filename = filename.decode('utf-8')
+                filename = filename.encode('idna')
+            else:
+                filename = filename.encode('idna')
+    except UnicodeError:
+        pass
+    if isinstance(filename, unicode):
+        filename = filename.encode('utf-8')
+    filemd5 = md5(filename).hexdigest()
+    filename = re_url_scheme.sub("", filename)
+    filename = re_slash.sub(",", filename)
+
+    # limit length of filename
+    if len(filename) > 200:
+        filename = filename[:200]
+    return ",".join((filename, filemd5))
+
+
+@contextmanager
+def filelock(key, mode, timeout=None, interval=.1):
+    """
+    A simple context manager that creates a temporary file for
+    locking a specific cache entry.
+
+    This was inspired pretty directly by:
+      http://amix.dk/blog/post/19531
+    """
+    lockfile = '%s.lock' % key
+    if timeout:
+        iterations = int(timeout / interval)
+    locked = os.path.exists(lockfile)
+    while locked:
+        error = FileCacheLockedException(lockfile, key)
+        if timeout:
+            # it is already locked, but we have a timeout if we
+            # want to try and block until it is available
+            time.sleep(interval)
+            iterations -= 1
+            locked = os.path.exists(lockfile)
+            if not iterations:
+                raise error
+        else:
+            # it is locked and we don't have a timeout, so raise
+            # the error
+            error
+
+    with open(lockfile, 'w+') as file_lock:
+        file_lock.write('1')
+        with open(key, mode) as opened_file:
+            yield opened_file
+    os.remove(lockfile)
+
+
+class FileCacheLockedException(Exception):
+
+    def __init__(self, lockfile, fname):
+        self.lockfile, self.fname = lockfile, fname
+        msg = '"%s" is locked. Try removing "%s"' % (lockfile, fname)
+        super(FileCacheLockedException, self).__init__(msg)
+
+
+class FileCache(BaseCache):
+    """
+    A simple threadsafe file based cache.
+
+    The file cache is a port of httplib2's directory cache. The only
+    difference is that it uses a lock when writing the file and
+    pickles the value. The pickling is used b/c we are using
+    requests.Response objects.
+    """
+
+    def __init__(self, cache, worker=None):
+        self.cache = cache
+        self.safe = safename
+        if not os.path.exists(cache):
+            os.makedirs(self.cache)
+        self.lock = Lock()
+
+    def get(self, key):
+        retval = None
+        cache_full_path = os.path.join(self.cache, safename(key))
+        try:
+            with open(cache_full_path, "rb") as f:
+                retval = load(f)
+        except (IOError, EOFError):
+            pass
+        return retval
+
+    def set(self, key, value):
+        cache_full_path = os.path.join(self.cache, safename(key))
+        with filelock(cache_full_path, "wb", timeout=1) as f:
+            dump(value, f)
+
+    def delete(self, key):
+        cache_full_path = os.path.join(self.cache, safename(key))
+        if os.path.exists(cache_full_path):
+            os.remove(cache_full_path)
diff --git a/httpcache/httpcache/tests/__init__.py b/httpcache/httpcache/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/httpcache/httpcache/tests/__init__.py
diff --git a/httpcache/httpcache/tests/integration/test_caching_conditions.py b/httpcache/httpcache/tests/integration/test_caching_conditions.py
new file mode 100644
index 00000000..59d35567
--- /dev/null
+++ b/httpcache/httpcache/tests/integration/test_caching_conditions.py
@@ -0,0 +1,44 @@
+from httpcache import CacheControl
+import requests
+
+
+class TestCachingConditions(object):
+
+    def test_no_caching_directives(self):
+        url = 'http://localhost:8080/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        r = c.get(url)
+
+        assert r
+        assert r.content == 'foo'
+        assert not c.cache.get(url)
+
+    def test_cache_max_age(self):
+        url = 'http://localhost:8080/max_age/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        r = c.get(url)
+        assert c.cache.get(url)
+        assert c.cache.get(url) == r
+
+    def test_cache_no_cache(self):
+        url = 'http://localhost:8080/no_cache/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        c.get(url)
+        assert not c.cache.get(url)
+
+    def test_cache_must_revalidate(self):
+        url = 'http://localhost:8080/must_revalidate/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        c.get(url)
+        assert not c.cache.get(url)
+
+    def test_cache_no_store(self):
+        url = 'http://localhost:8080/no_store/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        c.get(url)
+        assert not c.cache.get(url)
diff --git a/httpcache/httpcache/tests/server.py b/httpcache/httpcache/tests/server.py
new file mode 100644
index 00000000..140c3f1d
--- /dev/null
+++ b/httpcache/httpcache/tests/server.py
@@ -0,0 +1,36 @@
+from __future__ import print_function
+import cherrypy
+
+
+class CacheTestingServer(object):
+
+    def index(self):
+        return 'foo'
+    index.exposed = True
+
+    def max_age(self, value=None):
+        age = 'max-age=%s' % (value or 300)
+        cherrypy.response.headers['Cache-Control'] = age
+        return 'max age'
+    max_age.exposed = True
+
+    def no_cache(self):
+        cherrypy.response.headers['Cache-Control'] = 'no-cache'
+        return 'no cache'
+    no_cache.exposed = True
+
+    def must_revalidate(self):
+        cherrypy.response.headers['Cache-Control'] = 'must-revalidate'
+        return 'must revalidate'
+    must_revalidate.exposed = True
+
+    def no_store(self):
+        cherrypy.response.headers['Cache-Control'] = 'no-store'
+        return 'no store'
+    no_store.exposed = True
+
+
+if __name__ == '__main__':
+    cherrypy.tree.mount(CacheTestingServer(), '/')
+    cherrypy.engine.start()
+    cherrypy.engine.block()
diff --git a/httpcache/httpcache/tests/test_cache.py b/httpcache/httpcache/tests/test_cache.py
new file mode 100644
index 00000000..4efbc5c4
--- /dev/null
+++ b/httpcache/httpcache/tests/test_cache.py
@@ -0,0 +1,76 @@
+"""
+Tests for our caches
+"""
+import os
+
+from threading import Thread
+# from multiprocessing import Process
+from httpcache.cache import FileCache, safename, filelock
+from httpcache.cache import FileCacheLockedException
+
+
+class RandomWriter(Thread):
+    def __init__(self, cache):
+        super(RandomWriter, self).__init__()
+        self.key = 'threading_test'
+        self.cache = cache
+
+    def run(self):
+        for x in range(0, 5):
+            value = x * 1000
+            self.cache.set(self.key, value)
+            assert self.cache.get(self.key) == value
+
+
+class TestFileCache(object):
+
+    cache_dir = 'test_file_cache'
+
+    def test_set_get_delete(self):
+        fc = FileCache(self.cache_dir)
+        fc.set('foo', 'bar')
+        assert fc.get('foo') == 'bar'
+        fc.delete('foo')
+        assert fc.get('foo') == None
+
+    def test_setting_with_multiple_threads(self):
+        fc = FileCache(self.cache_dir)
+        w1 = RandomWriter(fc)
+        w2 = RandomWriter(fc)
+        w1.start()
+        w2.start()
+        w1.join()
+        w2.join()
+        assert fc.get(w1.key) == 4000
+
+    def test_locked_raises_exception(self):
+        key, value = 'locked', {'foo': 'bar'}
+        fc = FileCache(self.cache_dir)
+        lockfile = os.path.join(self.cache_dir,
+                                '%s.lock' % safename(key))
+        with open(lockfile, 'w+') as lock:
+            lock.write('1')
+        assert os.path.exists(lockfile)
+        try:
+            fc.set(key, value)
+            assert False
+        except FileCacheLockedException:
+            assert True
+        os.remove(lockfile)
+        fc.set(key, value)
+        assert fc.get(key) == value
+        assert not os.path.exists(lockfile)
+
+    def test_filelock_timeout(self):
+        fc = FileCache(self.cache_dir)
+        fname = safename('locked')
+        lockfile = '%s.lock' % fname
+        with open(lockfile, 'w+') as lock:
+            lock.write('1')
+            try:
+                with filelock(fname, 'w+', .5):
+                    assert False
+            except FileCacheLockedException:
+                assert True
+        os.remove(lockfile)
+        assert not os.path.exists(lockfile)
diff --git a/httpcache/httpcache/tests/test_cache_control.py b/httpcache/httpcache/tests/test_cache_control.py
new file mode 100644
index 00000000..c06fabbb
--- /dev/null
+++ b/httpcache/httpcache/tests/test_cache_control.py
@@ -0,0 +1,167 @@
+"""
+Unit tests that verify our caching methods work correctly.
+"""
+import mock
+import datetime
+import time
+
+from httpcache import CacheControl
+from httpcache.cache import DictCache
+
+
+TIME_FMT = "%a, %d %b %Y %H:%M:%S"
+
+
+class TestCacheControlResponse(object):
+    url = 'http://url.com/'
+
+    def req(self, headers=None):
+        headers = headers or {}
+        return mock.Mock(full_url=self.url, headers=headers)
+
+    def resp(self, headers=None):
+        headers = headers or {}
+        return mock.Mock(status_code=200,
+                         headers=headers,
+                         request=self.req())
+
+    def cache(self):
+        return CacheControl(mock.Mock(), mock.MagicMock())
+
+    def test_no_cache_non_20x_response(self):
+        c = self.cache()
+
+        # No caching without some extra headers, so we add them
+        now = datetime.datetime.utcnow().strftime(TIME_FMT)
+        resp = self.resp({'cache-control': 'max-age=3600',
+                          'date': now})
+
+        no_cache_codes = [201, 300, 400, 500]
+        for code in no_cache_codes:
+            resp.status_code = code
+            c.cache_response(resp)
+            assert not c.cache.set.called
+
+        # this should work b/c the resp is 20x
+        resp.status_code = 203
+        c.cache_response(resp)
+        assert c.cache.set.called
+
+    def test_no_cache_with_no_date(self):
+        c = self.cache()
+
+        # No date header which makes our max-age pointless
+        resp = self.resp({'cache-control': 'max-age=3600'})
+        c.cache_response(resp)
+
+        assert not c.cache.set.called
+
+    def test_cache_response_no_cache_control(self):
+        c = self.cache()
+        resp = self.resp()
+        c.cache_response(resp)
+
+        assert not c.cache.set.called
+
+    def test_cache_response_cache_max_age(self):
+        c = self.cache()
+
+        now = datetime.datetime.utcnow().strftime(TIME_FMT)
+        resp = self.resp({'cache-control': 'max-age=3600',
+                          'date': now})
+        c.cache_response(resp)
+        c.cache.set.assert_called_with(self.url, resp)
+
+    def test_cache_repsonse_no_store(self):
+        resp = mock.Mock()
+        cache = DictCache({self.url: resp})
+        c = CacheControl(resp, cache)
+
+        cache_url = c.cache_url(self.url)
+
+        resp = self.resp({'cache-control': 'no-store'})
+        assert c.cache.get(cache_url)
+
+        c.cache_response(resp)
+        assert not c.cache.get(cache_url)
+
+
+class TestCacheControlRequest(object):
+
+    url = 'http://foo.com'
+
+    def test_cache_request_no_cache(self):
+        c = CacheControl(mock.Mock())
+        hdrs = {'cache-control': 'no-cache'}
+        resp = c.cached_request(self.url, headers=hdrs)
+        assert not resp
+
+    def test_cache_request_pragma_no_cache(self):
+        c = CacheControl(mock.Mock())
+        hdrs = {'pragma': 'no-cache'}
+        resp = c.cached_request(self.url, headers=hdrs)
+        assert not resp
+
+    def test_cache_request_no_store(self):
+        c = CacheControl(mock.Mock())
+        hdrs = {'cache-control': 'no-store'}
+        resp = c.cached_request(self.url, headers=hdrs)
+        assert not resp
+
+    def test_cache_request_max_age_0(self):
+        c = CacheControl(mock.Mock())
+        hdrs = {'cache-control': 'max-age=0'}
+        resp = c.cached_request(self.url, headers=hdrs)
+        assert not resp
+
+    def test_cache_request_not_in_cache(self):
+        c = CacheControl(mock.Mock())
+        resp = c.cached_request(self.url)
+        assert not resp
+
+    def test_cache_request_fresh_max_age(self):
+        now = datetime.datetime.utcnow().strftime(TIME_FMT)
+        resp = mock.Mock(headers={'cache-control': 'max-age=3600',
+                                  'date': now})
+
+        # NOTE: httplib2 uses its own algorithm for finding the
+        # "defrag_uri" in order to use it for creating a cache key. It
+        # seems to append the trailing slash, which I'm pretty sure is
+        # b/c of the auto directory rules. I'm trusting it is correct.
+        cache = DictCache({self.url + '/': resp})
+        c = CacheControl(mock.Mock(), cache)
+        r = c.cached_request(self.url)
+        assert r == resp
+
+    def test_cache_request_unfresh_max_age(self):
+        earlier = time.time() - 3700
+        now = datetime.datetime.fromtimestamp(earlier).strftime(TIME_FMT)
+
+        resp = mock.Mock(headers={'cache-control': 'max-age=3600',
+                                  'date': now})
+        cache = DictCache({self.url: resp})
+        c = CacheControl(mock.Mock(), cache)
+        r = c.cached_request(self.url)
+        assert not r
+
+    def test_cache_request_fresh_expires(self):
+        later = datetime.timedelta(days=1)
+        expires = (datetime.datetime.utcnow() + later).strftime(TIME_FMT)
+        now = datetime.datetime.utcnow().strftime(TIME_FMT)
+        resp = mock.Mock(headers={'expires': expires,
+                                  'date': now})
+        cache = DictCache({self.url + '/': resp})
+        c = CacheControl(mock.Mock, cache)
+        r = c.cached_request(self.url)
+        assert r == resp
+
+    def test_cache_request_unfresh_expires(self):
+        later = datetime.timedelta(days=-1)
+        expires = (datetime.datetime.utcnow() + later).strftime(TIME_FMT)
+        now = datetime.datetime.utcnow().strftime(TIME_FMT)
+        resp = mock.Mock(headers={'expires': expires,
+                                  'date': now})
+        cache = DictCache({self.url: resp})
+        c = CacheControl(mock.Mock, cache)
+        r = c.cached_request(self.url)
+        assert not r
diff --git a/httpcache/httpcache/tests/test_cache_invalidation.py b/httpcache/httpcache/tests/test_cache_invalidation.py
new file mode 100644
index 00000000..017a3d12
--- /dev/null
+++ b/httpcache/httpcache/tests/test_cache_invalidation.py
@@ -0,0 +1,39 @@
+"""
+When resources are known to be updated via HTTP (ie PUT, DELETE), we
+should invalidate them in our cache.
+"""
+import mock
+
+from httpcache import CacheControl
+from httpcache.cache import DictCache
+
+
+class TestInvalidations(object):
+
+    url = 'http://foo.com/bar/'
+
+    def resp(self):
+        req = mock.Mock(full_url=self.url)
+        return mock.Mock(request=req)
+
+    def test_put_invalidates_cache(self):
+        # Prep our cache
+        resp = self.resp()
+        cache = DictCache({self.url: resp})
+        session = mock.Mock(put=mock.Mock(return_value=resp))
+        c = CacheControl(session, cache)
+
+        c.put(self.url)
+
+        assert not c.cache.get(self.url)
+
+    def test_delete_invalidates_cache(self):
+        # Prep our cache
+        resp = self.resp()
+        cache = DictCache({self.url: resp})
+        session = mock.Mock(delete=mock.Mock(return_value=resp))
+        c = CacheControl(session, cache)
+
+        c.delete(self.url)
+
+        assert not c.cache.get(self.url)
diff --git a/httpcache/httpcache/tests/test_max_age.py b/httpcache/httpcache/tests/test_max_age.py
new file mode 100644
index 00000000..f33f501b
--- /dev/null
+++ b/httpcache/httpcache/tests/test_max_age.py
@@ -0,0 +1,50 @@
+from __future__ import print_function
+from .. import CacheControl
+import requests
+
+
+class TestMaxAge(object):
+
+    def test_client_max_age_0(self):
+        """
+        Making sure when the client uses max-age=0 we don't get a
+        cached copy even though we're still fresh.
+        """
+        url = 'http://localhost:8080/max_age/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        print('first request')
+        r = c.get(url)
+        cache_url = c.cache_url(url)
+        assert c.cache.get(cache_url) == r
+
+        print('second request')
+        r = c.get(url, headers={'Cache-Control': 'max-age=0'})
+
+        # don't remove from the cache
+        assert c.cache.get(cache_url)
+        assert r.from_cache == False
+
+    def test_client_max_age_3600(self):
+        """
+        Verify we get a cached value when the client has a
+        reasonable max-age value.
+        """
+        # prep our cache
+        url = 'http://localhost:8080/max_age/'
+        s = requests.Session()
+        c = CacheControl(s, {})
+        r = c.get(url)
+        cache_url = c.cache_url(url)
+        assert c.cache.get(cache_url) == r
+
+        # request that we don't want a new one unless
+        r = c.get(url, headers={'Cache-Control': 'max-age=3600'})
+        assert r.from_cache == True
+
+        # now lets grab one that forces a new request b/c the cache
+        # has expired. To do that we'll inject a new time value.
+        resp = c.cache.get(cache_url)
+        resp.headers['date'] = 'Tue, 15 Nov 1994 08:12:31 GMT'
+        r = c.get(url)
+        assert r.from_cache == False
diff --git a/httpcache/setup.py b/httpcache/setup.py
new file mode 100644
index 00000000..8d188634
--- /dev/null
+++ b/httpcache/setup.py
@@ -0,0 +1,18 @@
+import setuptools
+
+setup_params = dict(
+    name='HTTPCache',
+    version='0.5',
+    author="Eric Larson",
+    author_email="eric@ionrock.org",
+    url="https://bitbucket.org/elarson/httpcache",
+    packages=setuptools.find_packages(),
+    tests_requires=[
+        'py.test',
+        'cherrypy',
+    ],
+)
+
+
+if __name__ == '__main__':
+    setuptools.setup(**setup_params)
diff --git a/py-reqcache b/py-reqcache
new file mode 160000
+Subproject 5b4f3e5217c2d1b32818bf4d8121a0a243e67db
diff --git a/requests/cache.py b/requests/cache.py
index 5be1dfbe..373e53c3 100644
--- a/requests/cache.py
+++ b/requests/cache.py
@@ -7,7 +7,11 @@ requests.cache
 
 Requests caching layer.
 """
+
+import time
 import hashlib
+import calendar
+import email
 from .packages.cachecore import SimpleCache
 
 DEFAULT_CACHE = SimpleCache
@@ -25,20 +29,16 @@ def expand_cache(c):
     if c is False:
         return Cache(backend=False)
 
-def request_hash(r, type=''):
-    """Returns a SHA256(type-method-url) for cache keys."""
-
-    s = '{0}-{1}-{2}'.format(type, r.request.method, r.request.full_url)
 
-    return hashlib.sha256(s).hexdigest()
 
 
 class Cache(object):
     """A Cache session."""
-    def __init__(self, backend=None, conditional=None, content=True):
+    def __init__(self, backend=None, conditional=None, content=True, handler=None):
 
         self.conditional = None
         self.content = None
+        self.handler = handler
 
         # Default to backend if True.
         if not backend is None:
@@ -54,3 +54,141 @@ class Cache(object):
         if content is not True:
             self.content = content
 
+        if handler is None:
+            self.handler = CacheHandler()
+
+
+
+class CacheHandler(object):
+
+
+    @staticmethod
+    def request_hash(r, type=''):
+        """Returns a SHA256(type-method-url) for cache keys."""
+
+        s = '{0}-{1}-{2}'.format(type, r.request.method, r.request.full_url)
+        return hashlib.sha256(s).hexdigest()
+
+
+    @staticmethod
+    def _parse_cache_control(headers):
+        """
+        Parse the cache control headers returning a dictionary with values
+        for the different directives.
+        """
+        retval = {}
+        if 'cache-control' in headers:
+            parts = headers['cache-control'].split(',')
+            parts_with_args = [
+                tuple([x.strip().lower() for x in part.split("=", 1)])
+                for part in parts if -1 != part.find("=")]
+            parts_wo_args = [(name.strip().lower(), 1)
+                             for name in parts if -1 == name.find("=")]
+            retval = dict(parts_with_args + parts_wo_args)
+        return retval
+
+    def cache_request(self, r, cache):
+        """See if we should use a cached response."""
+
+        return True
+        cache_hash = self.request_hash(r)
+        cc = self._parse_cache_control(r.headers)
+
+        # non-caching states
+        no_cache = False
+        if 'no-cache' in cc:
+            no_cache = True
+        if 'max-age' in cc and cc['max-age'] == 0:
+            no_cache = True
+
+        # see if it is in the cache anyways
+        in_cache = cache.get(cache_hash)
+        if no_cache or not in_cache:
+            return False
+
+        # It is in the cache, so lets see if it is going to be
+        # fresh enough
+        resp = cache.get(cache_hash)
+        now = time.time()
+        date = calendar.timegm(
+            email.Utils.parsedate_tz(resp.headers['date']))
+        current_age = max(0, now - date)
+
+        resp_cc = self._parse_cache_control(resp.headers)
+
+
+        # determine freshness
+        freshness_lifetime = 0
+        if 'max-age' in resp_cc:
+            try:
+                freshness_lifetime = int(resp_cc['max-age'])
+            except ValueError:
+                pass
+        elif 'expires' in resp.headers:
+            expires = email.Utils.parsedate_tz(resp.headers['expires'])
+            if expires != None:
+                expire_time = calendar.timegm(expires) - date
+                freshness_lifetime = max(0, expire_time)
+
+        # determine if we are setting freshness limit in the req
+        if 'max-age' in cc:
+            try:
+                freshness_lifetime = int(cc['max-age'])
+            except ValueError:
+                freshness_lifetime = 0
+
+        if 'min-fresh' in cc:
+            try:
+                min_fresh = int(cc['min-fresh'])
+            except ValueError:
+                min_fresh = 0
+            # adjust our current age by our min fresh
+            current_age += min_fresh
+
+        # see how fresh we actually are
+        fresh = (freshness_lifetime > current_age)
+
+        if fresh:
+            # make sure we set the from_cache to true
+            resp.from_cache = True
+            return resp
+
+        # we're not fresh, clean out the junk
+        cache.delete(cache_hash)
+
+        # return the original handler
+        return False
+
+
+    def cache_response(self, r, cache):
+        """Algorithm for caching requests."""
+
+        if r.status_code not in [200, 203]:
+            return
+
+        cc_req = self._parse_cache_control(r.request.headers)
+        cc = self._parse_cache_control(r.headers)
+
+        # cache_url = self.cache_url(r.request.full_url)
+        cache_hash = self.request_hash(r)
+
+        # Delete it from the cache if we happen to have it stored there
+        no_store = cc.get('no-store') or cc_req.get('no-store')
+        if no_store and cache.get(cache_hash):
+            cache.delete(cache_hash)
+
+        # Add to the cache if the response headers demand it. If there
+        # is no date header then we can't do anything about expiring
+        # the cache.
+        if 'date' in r.headers:
+
+            # cache when there is a max-age > 0
+            if cc and cc.get('max-age'):
+                if int(cc['max-age']) > 0:
+                    cache.set(cache_hash, r)
+
+            # If the request can expire, it means we should cache it
+            # in the meantime.
+            elif 'expires' in r.headers:
+                if int(r.headers['expires']) > 0:
+                    cache.set(cache_hash, r)
diff --git a/requests/sessions.py b/requests/sessions.py
index 5e827543..f6416777 100644
--- a/requests/sessions.py
+++ b/requests/sessions.py
@@ -11,6 +11,7 @@ requests (cookies, auth, proxies).
 
 from copy import deepcopy
 from .compat import cookielib
+from .cache import expand_cache
 from .cookies import cookiejar_from_dict, remove_cookie_by_name
 from .defaults import defaults
 from .models import Request
@@ -81,10 +82,11 @@ class Session(object):
         self.prefetch = prefetch
         self.verify = verify
         self.cert = cert
-        self.cache = cache
+        self.cache = expand_cache(cache)
 
-        from .cache import ReqCache
-        self.cache = ReqCache("test", "memory")
+
+        # from .cache import ReqCache
+        # self.cache = ReqCache("test", "memory")
 
         for (k, v) in list(defaults.items()):
             self.config.setdefault(k, deepcopy(v))
diff --git a/t.py b/t.py
new file mode 100644
index 00000000..8e6921c8
--- /dev/null
+++ b/t.py
@@ -0,0 +1,26 @@
+# mycache = ReqCache("test", "memory")
+import requests
+s = requests.session()
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+
+r = s.get('http://github.com')
+print r.__dict__.get('from_cache')
+# r = requests.get('http://github.com', hooks=mycache.hooks)
+
+# r = requests.get('http://github.com', hooks=mycache.hooks)
+# explain_cache_result(r)
author	Kenneth Reitz <me@kennethreitz.com>	2012-10-16 13:39:00 -0700
committer	Kenneth Reitz <me@kennethreitz.com>	2012-10-16 13:39:00 -0700
commit	e3c51e4182c54ffc494a7327e69004342844b0a4 (patch)
tree	6f8a68415b5adc4109cd63b56d134e48535288b3
parent	706f88fedd7c526bc17ec9341a75ace1486d0a60 (diff)
download	python-requests-cache.tar.gz