diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-04-27 12:58:12 -0400 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2020-05-25 13:56:37 -0400 |
commit | 6930dfc032c3f9f474e71ab4e021c0ef8384930e (patch) | |
tree | 34b919a3c34edaffda1750f161a629fc5b9a8020 /examples | |
parent | dce8c7a125cb99fad62c76cd145752d5afefae36 (diff) | |
download | sqlalchemy-6930dfc032c3f9f474e71ab4e021c0ef8384930e.tar.gz |
Convert execution to move through Session
This patch replaces the ORM execution flow with a
single pathway through Session.execute() for all queries,
including Core and ORM.
Currently included is full support for ORM Query,
Query.from_statement(), select(), as well as the
baked query and horizontal shard systems. Initial
changes have also been made to the dogpile caching
example, which like baked query makes use of a
new ORM-specific execution hook that replaces the
use of both QueryEvents.before_compile() as well
as Query._execute_and_instances() as the central
ORM interception hooks.
select() and Query() constructs alike can be passed to
Session.execute() where they will return ORM
results in a Results object. This API is currently
used internally by Query. Full support for
Session.execute()->results to behave in a fully
2.0 fashion will be in later changesets.
bulk update/delete with ORM support will also
be delivered via the update() and delete()
constructs, however these have not yet been adapted
to the new system and may follow in a subsequent
update.
Performance is also beginning to lag as of this
commit and some previous ones. It is hoped that
a few central functions such as the coercions
functions can be rewritten in C to re-gain
performance. Additionally, query caching
is now available and some subsequent patches
will attempt to cache more of the per-execution
work from the ORM layer, e.g. column getters
and adapters.
This patch also contains initial "turn on" of the
caching system enginewide via the query_cache_size
parameter to create_engine(). Still defaulting at
zero for "no caching". The caching system still
needs adjustments in order to gain adequate performance.
Change-Id: I047a7ebb26aa85dc01f6789fac2bff561dcd555d
Diffstat (limited to 'examples')
-rw-r--r-- | examples/dogpile_caching/advanced.py | 4 | ||||
-rw-r--r-- | examples/dogpile_caching/caching_query.py | 269 | ||||
-rw-r--r-- | examples/dogpile_caching/environment.py | 10 | ||||
-rw-r--r-- | examples/dogpile_caching/helloworld.py | 24 | ||||
-rw-r--r-- | examples/dogpile_caching/local_session_caching.py | 2 | ||||
-rw-r--r-- | examples/performance/short_selects.py | 23 |
6 files changed, 149 insertions, 183 deletions
diff --git a/examples/dogpile_caching/advanced.py b/examples/dogpile_caching/advanced.py index d2ef82556..e72921ba4 100644 --- a/examples/dogpile_caching/advanced.py +++ b/examples/dogpile_caching/advanced.py @@ -5,6 +5,7 @@ including front-end loading, cache invalidation and collection caching. from .caching_query import FromCache from .caching_query import RelationshipCache +from .environment import cache from .environment import Session from .model import cache_address_bits from .model import Person @@ -48,7 +49,8 @@ def load_name_range(start, end, invalidate=False): # if requested, invalidate the cache on current criterion. if invalidate: - q.invalidate() + cache.invalidate(q, {}, FromCache("default", "name_range")) + cache.invalidate(q, {}, RelationshipCache(Person.addresses, "default")) return q.all() diff --git a/examples/dogpile_caching/caching_query.py b/examples/dogpile_caching/caching_query.py index d6e1435b0..54f712a11 100644 --- a/examples/dogpile_caching/caching_query.py +++ b/examples/dogpile_caching/caching_query.py @@ -2,16 +2,18 @@ which allow the usage of Dogpile caching with SQLAlchemy. Introduces a query option called FromCache. +.. versionchanged:: 1.4 the caching approach has been altered to work + based on a session event. + + The three new concepts introduced here are: - * CachingQuery - a Query subclass that caches and + * ORMCache - an extension for an ORM :class:`.Session` retrieves results in/from dogpile.cache. * FromCache - a query option that establishes caching parameters on a Query * RelationshipCache - a variant of FromCache which is specific to a query invoked during a lazy load. - * _params_from_query - extracts value parameters from - a Query. The rest of what's here are standard SQLAlchemy and dogpile.cache constructs. @@ -19,165 +21,97 @@ dogpile.cache constructs. """ from dogpile.cache.api import NO_VALUE -from sqlalchemy.orm.interfaces import MapperOption -from sqlalchemy.orm.query import Query +from sqlalchemy import event +from sqlalchemy.orm import loading +from sqlalchemy.orm.interfaces import UserDefinedOption -class CachingQuery(Query): - """A Query subclass which optionally loads full results from a dogpile - cache region. +class ORMCache(object): - The CachingQuery optionally stores additional state that allows it to - consult a dogpile.cache cache before accessing the database, in the form of - a FromCache or RelationshipCache object. Each of these objects refer to - the name of a :class:`dogpile.cache.Region` that's been configured and - stored in a lookup dictionary. When such an object has associated itself - with the CachingQuery, the corresponding :class:`dogpile.cache.Region` is - used to locate a cached result. If none is present, then the Query is - invoked normally, the results being cached. + """An add-on for an ORM :class:`.Session` optionally loads full results + from a dogpile cache region. - The FromCache and RelationshipCache mapper options below represent - the "public" method of configuring this state upon the CachingQuery. """ - def __init__(self, regions, *args, **kw): + def __init__(self, regions): self.cache_regions = regions - Query.__init__(self, *args, **kw) - - # NOTE: as of 1.4 don't override __iter__() anymore, the result object - # cannot be cached at that level. + self._statement_cache = {} + + def listen_on_session(self, session_factory): + event.listen(session_factory, "do_orm_execute", self._do_orm_execute) + + def _do_orm_execute(self, orm_context): + + for opt in orm_context.user_defined_options: + if isinstance(opt, RelationshipCache): + opt = opt._process_orm_context(orm_context) + if opt is None: + continue + + if isinstance(opt, FromCache): + dogpile_region = self.cache_regions[opt.region] + + our_cache_key = opt._generate_cache_key( + orm_context.statement, orm_context.parameters, self + ) + + if opt.ignore_expiration: + cached_value = dogpile_region.get( + our_cache_key, + expiration_time=opt.expiration_time, + ignore_expiration=opt.ignore_expiration, + ) + else: + + def createfunc(): + return orm_context.invoke_statement().freeze() + + cached_value = dogpile_region.get_or_create( + our_cache_key, + createfunc, + expiration_time=opt.expiration_time, + ) + + if cached_value is NO_VALUE: + # keyerror? this is bigger than a keyerror... + raise KeyError() + + orm_result = loading.merge_frozen_result( + orm_context.session, + orm_context.statement, + cached_value, + load=False, + ) + return orm_result() - def _execute_and_instances(self, context, **kw): - """override _execute_and_instances to pull results from dogpile - if the query is invoked directly from an external context. + else: + return None - This method is necessary in order to maintain compatibility - with the "baked query" system now used by default in some - relationship loader scenarios. Note also the - RelationshipCache._generate_cache_key method which enables - the baked query to be used within lazy loads. + def invalidate(self, statement, parameters, opt): + """Invalidate the cache value represented by a statement.""" - .. versionadded:: 1.2.7 + statement = statement.__clause_element__() - .. versionchanged:: 1.4 Added ``**kw`` arguments to the signature. + dogpile_region = self.cache_regions[opt.region] - """ - super_ = super(CachingQuery, self) - - if hasattr(self, "_cache_region"): - # special logic called when the Query._execute_and_instances() - # method is called directly from the baked query - return self.get_value( - createfunc=lambda: super_._execute_and_instances( - context, **kw - ).freeze() - ) - else: - return super_._execute_and_instances(context, **kw) + cache_key = opt._generate_cache_key(statement, parameters, self) - def _get_cache_plus_key(self): - """Return a cache region plus key.""" + dogpile_region.delete(cache_key) - dogpile_region = self.cache_regions[self._cache_region.region] - if self._cache_region.cache_key: - key = self._cache_region.cache_key - else: - key = _key_from_query(self) - return dogpile_region, key - def invalidate(self): - """Invalidate the cache value represented by this Query.""" +class FromCache(UserDefinedOption): + """Specifies that a Query should load results from a cache.""" - dogpile_region, cache_key = self._get_cache_plus_key() - dogpile_region.delete(cache_key) + propagate_to_loaders = False - def get_value( + def __init__( self, - merge=True, - createfunc=None, + region="default", + cache_key=None, expiration_time=None, ignore_expiration=False, ): - """Return the value from the cache for this query. - - Raise KeyError if no value present and no - createfunc specified. - - """ - dogpile_region, cache_key = self._get_cache_plus_key() - - # ignore_expiration means, if the value is in the cache - # but is expired, return it anyway. This doesn't make sense - # with createfunc, which says, if the value is expired, generate - # a new value. - assert ( - not ignore_expiration or not createfunc - ), "Can't ignore expiration and also provide createfunc" - - if ignore_expiration or not createfunc: - cached_value = dogpile_region.get( - cache_key, - expiration_time=expiration_time, - ignore_expiration=ignore_expiration, - ) - else: - cached_value = dogpile_region.get_or_create( - cache_key, createfunc, expiration_time=expiration_time - ) - if cached_value is NO_VALUE: - raise KeyError(cache_key) - - # in 1.4 the cached value is a FrozenResult. merge_result - # accommodates this directly and updates the ORM entities inside - # the object to be merged. - # TODO: should this broken into merge_frozen_result / merge_iterator? - if merge: - cached_value = self.merge_result(cached_value, load=False) - return cached_value() - - def set_value(self, value): - """Set the value in the cache for this query.""" - - dogpile_region, cache_key = self._get_cache_plus_key() - dogpile_region.set(cache_key, value) - - -def query_callable(regions, query_cls=CachingQuery): - def query(*arg, **kw): - return query_cls(regions, *arg, **kw) - - return query - - -def _key_from_query(query, qualifier=None): - """Given a Query, create a cache key. - - There are many approaches to this; here we use the simplest, - which is to create an md5 hash of the text of the SQL statement, - combined with stringified versions of all the bound parameters - within it. There's a bit of a performance hit with - compiling out "query.statement" here; other approaches include - setting up an explicit cache key with a particular Query, - then combining that with the bound parameter values. - - """ - - stmt = query.with_labels().statement - compiled = stmt.compile() - params = compiled.params - - # here we return the key as a long string. our "key mangler" - # set up with the region will boil it down to an md5. - return " ".join([str(compiled)] + [str(params[k]) for k in sorted(params)]) - - -class FromCache(MapperOption): - """Specifies that a Query should load results from a cache.""" - - propagate_to_loaders = False - - def __init__(self, region="default", cache_key=None): """Construct a new FromCache. :param region: the cache region. Should be a @@ -193,19 +127,34 @@ class FromCache(MapperOption): """ self.region = region self.cache_key = cache_key + self.expiration_time = expiration_time + self.ignore_expiration = ignore_expiration + + def _generate_cache_key(self, statement, parameters, orm_cache): + statement_cache_key = statement._generate_cache_key() + + key = statement_cache_key.to_offline_string( + orm_cache._statement_cache, parameters + ) + repr(self.cache_key) - def process_query(self, query): - """Process a Query during normal loading operation.""" - query._cache_region = self + # print("here's our key...%s" % key) + return key -class RelationshipCache(MapperOption): +class RelationshipCache(FromCache): """Specifies that a Query as called within a "lazy load" should load results from a cache.""" propagate_to_loaders = True - def __init__(self, attribute, region="default", cache_key=None): + def __init__( + self, + attribute, + region="default", + cache_key=None, + expiration_time=None, + ignore_expiration=False, + ): """Construct a new RelationshipCache. :param attribute: A Class.attribute which @@ -221,19 +170,17 @@ class RelationshipCache(MapperOption): """ self.region = region self.cache_key = cache_key + self.expiration_time = expiration_time + self.ignore_expiration = ignore_expiration self._relationship_options = { (attribute.property.parent.class_, attribute.property.key): self } - def process_query_conditionally(self, query): - """Process a Query that is used within a lazy loader. - - (the process_query_conditionally() method is a SQLAlchemy - hook invoked only within lazyload.) + def _process_orm_context(self, orm_context): + current_path = orm_context.loader_strategy_path - """ - if query._current_path: - mapper, prop = query._current_path[-2:] + if current_path: + mapper, prop = current_path[-2:] key = prop.key for cls in mapper.class_.__mro__: @@ -241,8 +188,7 @@ class RelationshipCache(MapperOption): relationship_option = self._relationship_options[ (cls, key) ] - query._cache_region = relationship_option - break + return relationship_option def and_(self, option): """Chain another RelationshipCache option to this one. @@ -254,16 +200,3 @@ class RelationshipCache(MapperOption): """ self._relationship_options.update(option._relationship_options) return self - - def _generate_cache_key(self, path): - """Indicate to the lazy-loader strategy that a "baked" query - may be used by returning ``None``. - - If this method is omitted, the default implementation of - :class:`.MapperOption._generate_cache_key` takes place, which - returns ``False`` to disable the "baked" query from being used. - - .. versionadded:: 1.2.7 - - """ - return None diff --git a/examples/dogpile_caching/environment.py b/examples/dogpile_caching/environment.py index 723ee653d..7f4f7e7a1 100644 --- a/examples/dogpile_caching/environment.py +++ b/examples/dogpile_caching/environment.py @@ -23,13 +23,11 @@ if py2k: # dogpile cache regions. A home base for cache configurations. regions = {} +# scoped_session. +Session = scoped_session(sessionmaker()) -# scoped_session. Apply our custom CachingQuery class to it, -# using a callable that will associate the dictionary -# of regions with the Query. -Session = scoped_session( - sessionmaker(query_cls=caching_query.query_callable(regions)) -) +cache = caching_query.ORMCache(regions) +cache.listen_on_session(Session) # global declarative base class. Base = declarative_base() diff --git a/examples/dogpile_caching/helloworld.py b/examples/dogpile_caching/helloworld.py index 6b03afbdb..6e79fc3fa 100644 --- a/examples/dogpile_caching/helloworld.py +++ b/examples/dogpile_caching/helloworld.py @@ -3,6 +3,7 @@ """ from .caching_query import FromCache +from .environment import cache from .environment import Session from .model import Person @@ -57,10 +58,19 @@ people_two_through_twelve = ( # same list of objects to be loaded, and the same parameters in the # same order, then call invalidate(). print("invalidating everything") -Session.query(Person).options(FromCache("default")).invalidate() -Session.query(Person).options(FromCache("default")).filter( - Person.name.between("person 02", "person 12") -).invalidate() -Session.query(Person).options(FromCache("default", "people_on_range")).filter( - Person.name.between("person 05", "person 15") -).invalidate() + +cache.invalidate(Session.query(Person), {}, FromCache("default")) +cache.invalidate( + Session.query(Person).filter( + Person.name.between("person 02", "person 12") + ), + {}, + FromCache("default"), +) +cache.invalidate( + Session.query(Person).filter( + Person.name.between("person 05", "person 15") + ), + {}, + FromCache("default", "people_on_range"), +) diff --git a/examples/dogpile_caching/local_session_caching.py b/examples/dogpile_caching/local_session_caching.py index 1700c7a63..8f505ead7 100644 --- a/examples/dogpile_caching/local_session_caching.py +++ b/examples/dogpile_caching/local_session_caching.py @@ -75,8 +75,8 @@ if __name__ == "__main__": # of "person 10" q = ( Session.query(Person) - .options(FromCache("local_session")) .filter(Person.name == "person 10") + .execution_options(cache_options=FromCache("local_session")) ) # load from DB diff --git a/examples/performance/short_selects.py b/examples/performance/short_selects.py index db8ab8789..38bc1508a 100644 --- a/examples/performance/short_selects.py +++ b/examples/performance/short_selects.py @@ -82,6 +82,29 @@ def test_orm_query_cols_only(n): ).one() +cache = {} + + +@Profiler.profile +def test_cached_orm_query(n): + """test new style cached queries of the full entity.""" + s = Session(bind=engine) + for id_ in random.sample(ids, n): + stmt = s.query(Customer).filter(Customer.id == id_) + s.execute(stmt, execution_options={"compiled_cache": cache}).one() + + +@Profiler.profile +def test_cached_orm_query_cols_only(n): + """test new style cached queries of the full entity.""" + s = Session(bind=engine) + for id_ in random.sample(ids, n): + stmt = s.query( + Customer.id, Customer.name, Customer.description + ).filter(Customer.id == id_) + s.execute(stmt, execution_options={"compiled_cache": cache}).one() + + @Profiler.profile def test_baked_query(n): """test a baked query of the full entity.""" |