diff options
| author | Jordan Cook <jordan.cook@pioneer.com> | 2021-09-05 14:26:32 -0500 |
|---|---|---|
| committer | Jordan Cook <jordan.cook@pioneer.com> | 2021-09-06 17:39:30 -0500 |
| commit | 30f6896b8c6bcd151fc50001ad451ffd6c6091e0 (patch) | |
| tree | d929dcf58883c874958a579410f857b8a43b8ce3 /requests_cache | |
| parent | 8c05674c426298da7cd8a333c2dfce53151e041f (diff) | |
| download | requests-cache-30f6896b8c6bcd151fc50001ad451ffd6c6091e0.tar.gz | |
Allow match_headers to optionally accept a list of specific headers to match
Diffstat (limited to 'requests_cache')
| -rw-r--r-- | requests_cache/backends/base.py | 4 | ||||
| -rw-r--r-- | requests_cache/cache_keys.py | 77 | ||||
| -rw-r--r-- | requests_cache/session.py | 3 |
3 files changed, 51 insertions, 33 deletions
diff --git a/requests_cache/backends/base.py b/requests_cache/backends/base.py index 0562a0a..8b066cc 100644 --- a/requests_cache/backends/base.py +++ b/requests_cache/backends/base.py @@ -45,17 +45,17 @@ class BaseCache: def __init__( self, *args, - match_headers: bool = False, + match_headers: Union[Iterable[str], bool] = False, ignored_parameters: Iterable[str] = None, key_fn: KEY_FN = None, **kwargs, ): self.responses: BaseStorage = DictStorage() self.redirects: BaseStorage = DictStorage() - self.match_headers = match_headers or kwargs.get('include_get_headers') self.ignored_parameters = ignored_parameters self.key_fn = key_fn or create_key self.name: str = kwargs.get('cache_name', '') + self.match_headers = match_headers or kwargs.pop('include_get_headers', False) @property def urls(self) -> Iterator[str]: diff --git a/requests_cache/cache_keys.py b/requests_cache/cache_keys.py index b83aa9a..93a6bce 100644 --- a/requests_cache/cache_keys.py +++ b/requests_cache/cache_keys.py @@ -9,7 +9,7 @@ from __future__ import annotations import json from hashlib import sha256 from operator import itemgetter -from typing import TYPE_CHECKING, Iterable, List, Mapping, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, Iterable, List, Mapping, Optional, Tuple, Union from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse from requests import Request, Session @@ -22,62 +22,67 @@ from . import get_valid_kwargs if TYPE_CHECKING: from .models import AnyRequest -DEFAULT_HEADERS = default_headers() -DEFAULT_EXCLUDE_HEADERS = ['Cache-Control', 'If-None-Match', 'If-Modified-Since'] +DEFAULT_REQUEST_HEADERS = default_headers() +DEFAULT_EXCLUDE_HEADERS = {'Cache-Control', 'If-None-Match', 'If-Modified-Since'} RequestContent = Union[Mapping, str, bytes] def create_key( request: AnyRequest = None, ignored_parameters: Iterable[str] = None, - match_headers: bool = False, + match_headers: Union[Iterable[str], bool] = False, **kwargs, ) -> str: - """Create a normalized cache key from a request object or :py:class:`~requests.Request` + """Create a normalized cache key from either a request object or :py:class:`~requests.Request` arguments """ + # Create a PreparedRequest, if needed if not request: request_kwargs = get_valid_kwargs(Request.__init__, kwargs) request = Session().prepare_request(Request(**request_kwargs)) if TYPE_CHECKING: assert request is not None + # Add method and relevant request settings key = sha256(encode((request.method or '').upper())) - url = remove_ignored_url_params(request, ignored_parameters) - url = url_normalize(url) - key.update(encode(url)) key.update(encode(kwargs.get('verify', True))) + # Add filtered/normalized URL + request params + url = remove_ignored_url_params(request, ignored_parameters) + key.update(encode(url_normalize(url))) + + # Add filtered request body body = remove_ignored_body_params(request, ignored_parameters) if body: key.update(body) - if match_headers and request.headers != DEFAULT_HEADERS: - exclude_headers = list(ignored_parameters or []) + DEFAULT_EXCLUDE_HEADERS - headers = normalize_dict(remove_ignored_headers(request, exclude_headers)) - if TYPE_CHECKING: - assert isinstance(headers, dict) - for name, value in headers.items(): - key.update(encode(f'{name}={value}')) + + # Add filtered/normalized headers + headers = get_matched_headers(request.headers, ignored_parameters, match_headers) + for k, v in headers.items(): + key.update(encode(f'{k}={v}')) return key.hexdigest() -def remove_ignored_params( - request: AnyRequest, ignored_parameters: Optional[Iterable[str]] -) -> AnyRequest: - """Remove ignored parameters from reuqest URL, body, and headers""" - if not ignored_parameters: - return request - request.headers = remove_ignored_headers(request, ignored_parameters) - request.url = remove_ignored_url_params(request, ignored_parameters) - request.body = remove_ignored_body_params(request, ignored_parameters) - return request +def get_matched_headers( + headers: CaseInsensitiveDict, ignored_parameters: Optional[Iterable[str]], match_headers +) -> Dict: + """Get only the headers we should match against, given an optional include list and/or exclude + list. Also normalizes headers (sorted/lowercased keys). + """ + if not match_headers: + return {} + + included = set(match_headers if isinstance(match_headers, Iterable) else headers.keys()) + included -= set(ignored_parameters or []) + included -= DEFAULT_EXCLUDE_HEADERS + return {k.lower(): headers[k] for k in sorted(included) if k in headers} def remove_ignored_headers( request: AnyRequest, ignored_parameters: Optional[Iterable[str]] ) -> CaseInsensitiveDict: - """Remove any ignored parameters from reuqest headers""" + """Remove any ignored request headers""" if not ignored_parameters: return request.headers headers = CaseInsensitiveDict(request.headers.copy()) @@ -86,6 +91,18 @@ def remove_ignored_headers( return headers +def remove_ignored_params( + request: AnyRequest, ignored_parameters: Optional[Iterable[str]] +) -> AnyRequest: + """Remove ignored parameters from request URL, body, and headers""" + if not ignored_parameters: + return request + request.headers = remove_ignored_headers(request, ignored_parameters) + request.url = remove_ignored_url_params(request, ignored_parameters) + request.body = remove_ignored_body_params(request, ignored_parameters) + return request + + def remove_ignored_url_params(request: AnyRequest, ignored_parameters: Optional[Iterable[str]]) -> str: """Remove any ignored request parameters from the URL""" url_str = str(request.url) @@ -135,10 +152,6 @@ def normalize_dict( items: Request params, data, or json normalize_data: Also normalize stringified JSON """ - - def sort_dict(d): - return dict(sorted(d.items(), key=itemgetter(0))) - if not items: return None if isinstance(items, Mapping): @@ -155,6 +168,10 @@ def normalize_dict( return items +def sort_dict(d: Mapping) -> Dict: + return dict(sorted(d.items(), key=itemgetter(0))) + + def encode(value, encoding='utf-8') -> bytes: """Encode a value to bytes, if it hasn't already been""" return value if isinstance(value, bytes) else str(value).encode(encoding) diff --git a/requests_cache/session.py b/requests_cache/session.py index 2231532..7c22c03 100644 --- a/requests_cache/session.py +++ b/requests_cache/session.py @@ -299,7 +299,8 @@ class CachedSession(CacheMixin, OriginalSession): urls_expire_after: Expiration times to apply for different URL patterns allowable_codes: Only cache responses with one of these status codes allowable_methods: Cache only responses for one of these HTTP methods - match_headers: Match request headers when reading from the cache + match_headers: Match request headers when reading from the cache; may be either a boolean + or a list of specific headers to match ignored_parameters: List of request parameters to not match against, and exclude from the cache filter_fn: Function that takes a :py:class:`~requests.Response` object and returns a boolean indicating whether or not that response should be cached. Will be applied to both new |
