summaryrefslogtreecommitdiff
path: root/requests_cache/backends/filesystem.py
blob: b5da0dfc1bd28236cb81ead1ee4d9f1af84e1708 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
.. image::
    ../_static/files-generic.png

This backend stores responses in files on the local filesystem (one file per response).

File Formats
^^^^^^^^^^^^
By default, responses are saved as pickle files, since this format is generally the fastest. If you
want to save responses in a human-readable format, you can use one of the other available
:ref:`serializers`. For example, to save responses as JSON files:

    >>> session = CachedSession('~/http_cache', backend='filesystem', serializer='json')
    >>> session.get('https://httpbin.org/get')
    >>> print(list(session.cache.paths()))
    ['/home/user/http_cache/4dc151d95200ec.json']

Or as YAML (requires ``pyyaml``):

    >>> session = CachedSession('~/http_cache', backend='filesystem', serializer='yaml')
    >>> session.get('https://httpbin.org/get')
    >>> print(list(session.cache.paths()))
    ['/home/user/http_cache/4dc151d95200ec.yaml']

Cache Files
^^^^^^^^^^^
* See :ref:`files` for general info on specifying cache paths
* The path for a given response will be in the format ``<cache_name>/<cache_key>``
* Redirects are stored in a separate SQLite database, located at ``<cache_name>/redirects.sqlite``
* Use :py:meth:`.FileCache.paths` to get a list of all cached response paths

API Reference
^^^^^^^^^^^^^
.. automodsumm:: requests_cache.backends.filesystem
   :classes-only:
   :nosignatures:
"""
from contextlib import contextmanager
from os import makedirs
from pathlib import Path
from pickle import PickleError
from shutil import rmtree
from typing import Iterator

from ..serializers import SERIALIZERS
from . import BaseCache, BaseStorage
from .sqlite import AnyPath, SQLiteDict, get_cache_path


class FileCache(BaseCache):
    """Filesystem backend.

    Args:
        cache_name: Base directory for cache files
        use_cache_dir: Store datebase in a user cache directory (e.g., `~/.cache/`)
        use_temp: Store cache files in a temp directory (e.g., ``/tmp/http_cache/``).
            Note: if ``cache_name`` is an absolute path, this option will be ignored.
        extension: Extension for cache files. If not specified, the serializer default extension
            will be used.
    """

    def __init__(self, cache_name: AnyPath = 'http_cache', use_temp: bool = False, **kwargs):
        super().__init__(**kwargs)
        self.responses: FileDict = FileDict(cache_name, use_temp=use_temp, **kwargs)
        self.redirects: SQLiteDict = SQLiteDict(
            self.cache_dir / 'redirects.sqlite', 'redirects', **kwargs
        )

    @property
    def cache_dir(self) -> Path:
        """Base directory for cache files"""
        return Path(self.responses.cache_dir)

    def paths(self) -> Iterator[Path]:
        """Get absolute file paths to all cached responses"""
        return self.responses.paths()

    def clear(self):
        """Clear the cache"""
        # FileDict.clear() removes the cache directory, including redirects.sqlite
        self.responses.clear()
        self.redirects.init_db()


class FileDict(BaseStorage):
    """A dictionary-like interface to files on the local filesystem"""

    def __init__(
        self,
        cache_name: AnyPath,
        use_temp: bool = False,
        use_cache_dir: bool = False,
        extension: str = None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.cache_dir = get_cache_path(cache_name, use_cache_dir=use_cache_dir, use_temp=use_temp)
        self.extension = _get_extension(extension, self.serializer)
        self.is_binary = False
        makedirs(self.cache_dir, exist_ok=True)

    @contextmanager
    def _try_io(self, ignore_errors: bool = False):
        """Attempt an I/O operation, and either ignore errors or re-raise them as KeyErrors"""
        try:
            yield
        except (IOError, OSError, PickleError) as e:
            if not ignore_errors:
                raise KeyError(e)

    def _path(self, key) -> Path:
        return self.cache_dir / f'{key}{self.extension}'

    def __getitem__(self, key):
        mode = 'rb' if self.is_binary else 'r'
        with self._try_io():
            try:
                with self._path(key).open(mode) as f:
                    return self.serializer.loads(f.read())
            except UnicodeDecodeError:
                self.is_binary = True
                return self.__getitem__(key)

    def __delitem__(self, key):
        with self._try_io():
            self._path(key).unlink()

    def __setitem__(self, key, value):
        serialized_value = self.serializer.dumps(value)
        if isinstance(serialized_value, bytes):
            self.is_binary = True
        mode = 'wb' if self.is_binary else 'w'
        with self._try_io():
            with self._path(key).open(mode) as f:
                f.write(self.serializer.dumps(value))

    def __iter__(self):
        yield from self.keys()

    def __len__(self):
        return sum(1 for _ in self.paths())

    def clear(self):
        with self._try_io(ignore_errors=True):
            rmtree(self.cache_dir, ignore_errors=True)
            self.cache_dir.mkdir()

    def keys(self):
        return [path.stem for path in self.paths()]

    def paths(self) -> Iterator[Path]:
        """Get absolute file paths to all cached responses"""
        return self.cache_dir.glob(f'*{self.extension}')


def _get_extension(extension: str = None, serializer=None) -> str:
    """Use either the provided file extension, or get the serializer's default extension"""
    if extension:
        return f'.{extension}'
    for name, obj in SERIALIZERS.items():
        if serializer is obj:
            return '.' + name.replace('pickle', 'pkl')
    return ''