remove lambda caching from loader strategies

Adjusted ORM loader internals to no longer use the "lambda caching" system that was added in 1.4, as well as repaired one location that was still using the previous "baked query" system for a query. The lambda caching system remains an effective way to reduce the overhead of building up queries that have relatively fixed usage patterns. In the case of loader strategies, the queries used are responsible for moving through lots of arbitrary options and criteria, which is both generated and sometimes consumed by end-user code, that make the lambda cache concept not any more efficient than not using it, at the cost of more complexity. In particular the problems noted by :ticket:`6881` and :ticket:`6887` are made considerably less complicated by removing this feature internally. Fixed an issue where the :class:`_orm.Bundle` construct would not create proper cache keys, leading to inefficient use of the query cache. This had some impact on the "selectinload" strategy and was identified as part of :ticket:`6889`. Added a Select._create_raw_select() method which essentially performs ``__new__`` and then populates ``__dict__`` directly, with no coercions. This saves most of the overhead time that the lambda caching system otherwise seeks to avoid. Includes removal of bakedquery from mapper->_subclass_load_via_in() which was overlooked from the 1.4 refactor. Fixes: #6079 Fixes: #6889 Change-Id: Ieac2d9d709b71ec4270e5c121fbac6ac870e2bb1
author: Mike Bayer <mike_mp@zzzcomputing.com> 2021-08-16 17:20:48 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2021-08-17 14:17:00 -0400
commit: 1b5ae17384660e9153168d1250003b87da690542 (patch)
tree: a4824e03c85f2bbe664a12a81335f6fec303e52d /lib/sqlalchemy
parent: 76b506ed51e31b922014a30de2a5952d1a6ad891 (diff)
download: sqlalchemy-1b5ae17384660e9153168d1250003b87da690542.tar.gz
7 files changed, 137 insertions, 198 deletions
diff --git a/lib/sqlalchemy/orm/context.py b/lib/sqlalchemy/orm/context.py
index 603477819..83b6586cc 100644
--- a/lib/sqlalchemy/orm/context.py
+++ b/lib/sqlalchemy/orm/context.py
@@ -1150,11 +1150,11 @@ class ORMSelectCompileState(ORMCompileState, SelectState):
     ):
 
         Select = future.Select
-        statement = Select.__new__(Select)
-        statement._raw_columns = raw_columns
-        statement._from_obj = from_obj
-
-        statement._label_style = label_style
+        statement = Select._create_raw_select(
+            _raw_columns=raw_columns,
+            _from_obj=from_obj,
+            _label_style=label_style,
+        )
 
         if where_criteria:
             statement._where_criteria = where_criteria
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index abc8780ed..42ece864c 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -32,6 +32,7 @@ from ..engine.result import FrozenResult
 from ..engine.result import SimpleResultMetaData
 from ..sql import util as sql_util
 from ..sql.selectable import LABEL_STYLE_TABLENAME_PLUS_COL
+from ..sql.selectable import SelectState
 
 _new_runid = util.counter()
 
@@ -431,7 +432,7 @@ def load_on_pk_identity(
     query = statement
     q = query._clone()
 
-    is_lambda = q._is_lambda_element
+    assert not q._is_lambda_element
 
     # TODO: fix these imports ....
     from .context import QueryContext, ORMCompileState
@@ -439,7 +440,13 @@ def load_on_pk_identity(
     if load_options is None:
         load_options = QueryContext.default_load_options
 
-    compile_options = ORMCompileState.default_compile_options
+    if (
+        statement._compile_options
+        is SelectState.default_select_compile_options
+    ):
+        compile_options = ORMCompileState.default_compile_options
+    else:
+        compile_options = statement._compile_options
 
     if primary_key_identity is not None:
         mapper = query._propagate_attrs["plugin_subject"]
@@ -468,24 +475,9 @@ def load_on_pk_identity(
                     "release."
                 )
 
-        if is_lambda:
-            q = q.add_criteria(
-                lambda q: q.where(
-                    sql_util._deep_annotate(_get_clause, {"_orm_adapt": True})
-                ),
-                # this track_on will allow the lambda to refresh if
-                # _get_clause goes stale due to reconfigured mapper.
-                # however, it's not needed as the lambda otherwise tracks
-                # on the SQL cache key of the expression.  the main thing
-                # is that the bindparam.key stays the same if the cache key
-                # stays the same, as we are referring to the .key explicitly
-                # in the params.
-                # track_on=[id(_get_clause)]
-            )
-        else:
-            q._where_criteria = (
-                sql_util._deep_annotate(_get_clause, {"_orm_adapt": True}),
-            )
+        q._where_criteria = (
+            sql_util._deep_annotate(_get_clause, {"_orm_adapt": True}),
+        )
 
         params = dict(
             [
@@ -498,57 +490,32 @@ def load_on_pk_identity(
     else:
         params = None
 
-    if is_lambda:
-        if with_for_update is not None or refresh_state or only_load_props:
-            raise NotImplementedError(
-                "refresh operation not supported with lambda statement"
-            )
-
+    if with_for_update is not None:
+        version_check = True
+        q._for_update_arg = with_for_update
+    elif query._for_update_arg is not None:
+        version_check = True
+        q._for_update_arg = query._for_update_arg
+    else:
         version_check = False
 
-        _, load_options = _set_get_options(
-            compile_options,
-            load_options,
-            version_check=version_check,
-            only_load_props=only_load_props,
-            refresh_state=refresh_state,
-            identity_token=identity_token,
-        )
+    if refresh_state and refresh_state.load_options:
+        compile_options += {"_current_path": refresh_state.load_path.parent}
+        q = q.options(*refresh_state.load_options)
 
-        if no_autoflush:
-            load_options += {"_autoflush": False}
-    else:
-        if with_for_update is not None:
-            version_check = True
-            q._for_update_arg = with_for_update
-        elif query._for_update_arg is not None:
-            version_check = True
-            q._for_update_arg = query._for_update_arg
-        else:
-            version_check = False
-
-        if refresh_state and refresh_state.load_options:
-            compile_options += {
-                "_current_path": refresh_state.load_path.parent
-            }
-            q = q.options(*refresh_state.load_options)
-
-        # TODO: most of the compile_options that are not legacy only involve
-        # this function, so try to see if handling of them can mostly be local
-        # to here
-
-        q._compile_options, load_options = _set_get_options(
-            compile_options,
-            load_options,
-            version_check=version_check,
-            only_load_props=only_load_props,
-            refresh_state=refresh_state,
-            identity_token=identity_token,
-        )
-        q._order_by = None
+    new_compile_options, load_options = _set_get_options(
+        compile_options,
+        load_options,
+        version_check=version_check,
+        only_load_props=only_load_props,
+        refresh_state=refresh_state,
+        identity_token=identity_token,
+    )
+    q._compile_options = new_compile_options
+    q._order_by = None
 
-        if no_autoflush:
-            load_options += {"_autoflush": False}
+    if no_autoflush:
+        load_options += {"_autoflush": False}
 
     execution_options = util.EMPTY_DICT.merge_with(
         execution_options, {"_sa_orm_load_options": load_options}
@@ -1110,21 +1077,24 @@ def _load_subclass_via_in(context, path, entity):
     def do_load(context, path, states, load_only, effective_entity):
         orig_query = context.query
 
-        q2 = q._with_lazyload_options(
-            (enable_opt,) + orig_query._with_options + (disable_opt,),
-            path.parent,
-            cache_path=path,
-        )
+        options = (enable_opt,) + orig_query._with_options + (disable_opt,)
+        q2 = q.options(*options)
 
-        if context.populate_existing:
-            q2.add_criteria(lambda q: q.populate_existing())
+        q2._compile_options = context.compile_state.default_compile_options
+        q2._compile_options += {"_current_path": path.parent}
 
-        q2(context.session).params(
-            primary_keys=[
-                state.key[1][0] if zero_idx else state.key[1]
-                for state, load_attrs in states
-            ]
-        ).all()
+        if context.populate_existing:
+            q2 = q2.execution_options(populate_existing=True)
+
+        context.session.execute(
+            q2,
+            dict(
+                primary_keys=[
+                    state.key[1][0] if zero_idx else state.key[1]
+                    for state, load_attrs in states
+                ]
+            ),
+        ).unique().scalars().all()
 
     return do_load
 
diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py
index 530c0a112..5eee134d5 100644
--- a/lib/sqlalchemy/orm/mapper.py
+++ b/lib/sqlalchemy/orm/mapper.py
@@ -3047,16 +3047,13 @@ class Mapper(
 
         return None
 
-    @util.preload_module(
-        "sqlalchemy.ext.baked", "sqlalchemy.orm.strategy_options"
-    )
+    @util.preload_module("sqlalchemy.orm.strategy_options")
     def _subclass_load_via_in(self, entity):
-        """Assemble a BakedQuery that can load the columns local to
+        """Assemble a that can load the columns local to
         this subclass as a SELECT with IN.
 
         """
         strategy_options = util.preloaded.orm_strategy_options
-        baked = util.preloaded.ext_baked
 
         assert self.inherits
 
@@ -3094,24 +3091,23 @@ class Mapper(
         if entity.is_aliased_class:
             assert entity.mapper is self
 
-            q = baked.BakedQuery(
-                self._compiled_cache,
-                lambda session: session.query(entity).select_entity_from(
-                    entity.selectable
-                ),
-                (self,),
+            q = sql.select(entity).set_label_style(
+                LABEL_STYLE_TABLENAME_PLUS_COL
             )
-            q.spoil()
+
+            in_expr = entity._adapter.traverse(in_expr)
+            primary_key = [entity._adapter.traverse(k) for k in primary_key]
+            q = q.where(
+                in_expr.in_(sql.bindparam("primary_keys", expanding=True))
+            ).order_by(*primary_key)
         else:
-            q = baked.BakedQuery(
-                self._compiled_cache,
-                lambda session: session.query(self),
-                (self,),
-            )
 
-        q += lambda q: q.filter(
-            in_expr.in_(sql.bindparam("primary_keys", expanding=True))
-        ).order_by(*primary_key)
+            q = sql.select(self).set_label_style(
+                LABEL_STYLE_TABLENAME_PLUS_COL
+            )
+            q = q.where(
+                in_expr.in_(sql.bindparam("primary_keys", expanding=True))
+            ).order_by(*primary_key)
 
         return q, enable_opt, disable_opt
 
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 9a97d37b0..a1fb16a3a 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -444,8 +444,7 @@ class Query(
             )
         else:
             # Query / select() internal attributes are 99% cross-compatible
-            stmt = Select.__new__(Select)
-            stmt.__dict__.update(self.__dict__)
+            stmt = Select._create_raw_select(**self.__dict__)
             stmt.__dict__.update(
                 _label_style=self._label_style,
                 _compile_options=compile_options,
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 587daa332..955cd6dd2 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -43,6 +43,7 @@ from .. import util
 from ..sql import util as sql_util
 from ..sql import visitors
 from ..sql.selectable import LABEL_STYLE_TABLENAME_PLUS_COL
+from ..sql.selectable import Select
 
 
 def _register_attribute(
@@ -631,7 +632,6 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
         "_simple_lazy_clause",
         "_raise_always",
         "_raise_on_sql",
-        "_lambda_cache",
     )
 
     def __init__(self, parent, strategy_key):
@@ -913,13 +913,6 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
             for pk in self.mapper.primary_key
         ]
 
-    def _memoized_attr__lambda_cache(self):
-        # cache is per lazy loader, and is used for caching of
-        # sqlalchemy.sql.lambdas.AnalyzedCode and
-        # sqlalchemy.sql.lambdas.AnalyzedFunction objects which are generated
-        # from the StatementLambda used.
-        return util.LRUCache(30)
-
     @util.preload_module("sqlalchemy.orm.strategy_options")
     def _emit_lazyload(
         self,
@@ -932,18 +925,13 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
     ):
         strategy_options = util.preloaded.orm_strategy_options
 
-        stmt = sql.lambda_stmt(
-            lambda: sql.select(self.entity)
-            .set_label_style(LABEL_STYLE_TABLENAME_PLUS_COL)
-            ._set_compile_options(ORMCompileState.default_compile_options),
-            global_track_bound_values=False,
-            lambda_cache=self._lambda_cache,
-            track_on=(self,),
+        clauseelement = self.entity.__clause_element__()
+        stmt = Select._create_raw_select(
+            _raw_columns=[clauseelement],
+            _propagate_attrs=clauseelement._propagate_attrs,
+            _label_style=LABEL_STYLE_TABLENAME_PLUS_COL,
+            _compile_options=ORMCompileState.default_compile_options,
         )
-
-        if not self.parent_property.bake_queries:
-            stmt = stmt.spoil()
-
         load_options = QueryContext.default_load_options
 
         load_options += {
@@ -952,18 +940,15 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
         }
 
         if self.parent_property.secondary is not None:
-            stmt = stmt.add_criteria(
-                lambda stmt: stmt.select_from(
-                    self.mapper, self.parent_property.secondary
-                ),
-                track_on=[self.parent_property],
+            stmt = stmt.select_from(
+                self.mapper, self.parent_property.secondary
             )
 
         pending = not state.key
 
         # don't autoflush on pending
         if pending or passive & attributes.NO_AUTOFLUSH:
-            stmt += lambda stmt: stmt.execution_options(autoflush=False)
+            stmt._execution_options = util.immutabledict({"autoflush": False})
 
         use_get = self.use_get
 
@@ -978,15 +963,13 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
                     orm_util.LoaderCriteriaOption(self.entity, extra_criteria),
                 )
 
-            stmt += lambda stmt: stmt.options(*opts)
+            stmt._with_options = opts
         else:
             # this path is used if there are not already any options
             # in the query, but an event may want to add them
             effective_path = state.mapper._path_registry[self.parent_property]
 
-        stmt += lambda stmt: stmt._update_compile_options(
-            {"_current_path": effective_path}
-        )
+        stmt._compile_options += {"_current_path": effective_path}
 
         if use_get:
             if self._raise_on_sql and not passive & attributes.NO_RAISE:
@@ -997,9 +980,7 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
             )
 
         if self._order_by:
-            stmt = stmt.add_criteria(
-                lambda stmt: stmt.order_by(*self._order_by), track_on=[self]
-            )
+            stmt._order_by_clauses = self._order_by
 
         def _lazyload_reverse(compile_context):
             for rev in self.parent_property._reverse_property:
@@ -1016,11 +997,8 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
                         ]
                     ).lazyload(rev).process_compile_state(compile_context)
 
-        stmt = stmt.add_criteria(
-            lambda stmt: stmt._add_context_option(
-                _lazyload_reverse, self.parent_property
-            ),
-            track_on=[self],
+        stmt._with_context_options += (
+            (_lazyload_reverse, self.parent_property),
         )
 
         lazy_clause, params = self._generate_lazy_clause(state, passive)
@@ -1045,9 +1023,7 @@ class LazyLoader(AbstractRelationshipLoader, util.MemoizedSlots):
         if self._raise_on_sql and not passive & attributes.NO_RAISE:
             self._invoke_raise_load(state, passive, "raise_on_sql")
 
-        stmt = stmt.add_criteria(
-            lambda stmt: stmt.where(lazy_clause), enable_tracking=False
-        )
+        stmt._where_criteria = (lazy_clause,)
 
         result = session.execute(
             stmt, params, execution_options=execution_options
@@ -2634,7 +2610,6 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
         "_parent_alias",
         "_query_info",
         "_fallback_query_info",
-        "_lambda_cache",
     )
 
     query_info = collections.namedtuple(
@@ -2738,13 +2713,6 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
             (("lazy", "select"),)
         ).init_class_attribute(mapper)
 
-    def _memoized_attr__lambda_cache(self):
-        # cache is per lazy loader, and is used for caching of
-        # sqlalchemy.sql.lambdas.AnalyzedCode and
-        # sqlalchemy.sql.lambdas.AnalyzedFunction objects which are generated
-        # from the StatementLambda used.
-        return util.LRUCache(30)
-
     def create_row_processor(
         self,
         context,
@@ -2879,25 +2847,19 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
                 ]
                 in_expr = effective_entity._adapt_element(in_expr)
 
-        q = sql.lambda_stmt(
-            lambda: sql.select(
-                orm_util.Bundle("pk", *pk_cols), effective_entity
-            )
-            .set_label_style(LABEL_STYLE_TABLENAME_PLUS_COL)
-            ._set_compile_options(ORMCompileState.default_compile_options)
-            ._set_propagate_attrs(
-                {
-                    "compile_state_plugin": "orm",
-                    "plugin_subject": effective_entity,
-                }
-            ),
-            lambda_cache=self._lambda_cache,
-            global_track_bound_values=False,
-            track_on=(self, effective_entity) + (tuple(pk_cols),),
-        )
+        bundle_ent = orm_util.Bundle("pk", *pk_cols)
+        bundle_sql = bundle_ent.__clause_element__()
 
-        if not self.parent_property.bake_queries:
-            q = q.spoil()
+        entity_sql = effective_entity.__clause_element__()
+        q = Select._create_raw_select(
+            _raw_columns=[bundle_sql, entity_sql],
+            _label_style=LABEL_STYLE_TABLENAME_PLUS_COL,
+            _compile_options=ORMCompileState.default_compile_options,
+            _propagate_attrs={
+                "compile_state_plugin": "orm",
+                "plugin_subject": effective_entity,
+            },
+        )
 
         if not query_info.load_with_join:
             # the Bundle we have in the "omit_join" case is against raw, non
@@ -2905,23 +2867,19 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
             # entity, we add it explicitly.  If we made the Bundle against
             # annotated columns, we hit a performance issue in this specific
             # case, which is detailed in issue #4347.
-            q = q.add_criteria(lambda q: q.select_from(effective_entity))
+            q = q.select_from(effective_entity)
         else:
             # in the non-omit_join case, the Bundle is against the annotated/
             # mapped column of the parent entity, but the #4347 issue does not
             # occur in this case.
-            q = q.add_criteria(
-                lambda q: q.select_from(self._parent_alias).join(
-                    getattr(
-                        self._parent_alias, self.parent_property.key
-                    ).of_type(effective_entity)
-                ),
-                track_on=[self],
+            q = q.select_from(self._parent_alias).join(
+                getattr(self._parent_alias, self.parent_property.key).of_type(
+                    effective_entity
+                )
             )
 
-        q = q.add_criteria(
-            lambda q: q.filter(in_expr.in_(sql.bindparam("primary_keys")))
-        )
+        q = q.filter(in_expr.in_(sql.bindparam("primary_keys")))
+
         # a test which exercises what these comments talk about is
         # test_selectin_relations.py -> test_twolevel_selectin_w_polymorphic
         #
@@ -2968,16 +2926,12 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
                 ),
             )
 
-        q = q.add_criteria(
-            lambda q: q.options(*options)._update_compile_options(
-                {"_current_path": effective_path}
-            )
+        q = q.options(*options)._update_compile_options(
+            {"_current_path": effective_path}
         )
 
         if context.populate_existing:
-            q = q.add_criteria(
-                lambda q: q.execution_options(populate_existing=True)
-            )
+            q = q.execution_options(populate_existing=True)
 
         if self.parent_property.order_by:
             if not query_info.load_with_join:
@@ -2987,7 +2941,7 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
                         effective_entity._adapt_element(elem)
                         for elem in eager_order_by
                     ]
-                q = q.add_criteria(lambda q: q.order_by(*eager_order_by))
+                q = q.order_by(*eager_order_by)
             else:
 
                 def _setup_outermost_orderby(compile_context):
@@ -2995,11 +2949,8 @@ class SelectInLoader(PostLoader, util.MemoizedSlots):
                         util.to_list(self.parent_property.order_by)
                     )
 
-                q = q.add_criteria(
-                    lambda q: q._add_context_option(
-                        _setup_outermost_orderby, self.parent_property
-                    ),
-                    track_on=[self],
+                q = q._add_context_option(
+                    _setup_outermost_orderby, self.parent_property
                 )
 
         if query_info.load_only_child:
diff --git a/lib/sqlalchemy/orm/util.py b/lib/sqlalchemy/orm/util.py
index 46bb3c943..01a8becc3 100644
--- a/lib/sqlalchemy/orm/util.py
+++ b/lib/sqlalchemy/orm/util.py
@@ -1345,7 +1345,12 @@ def with_polymorphic(
 
 
 @inspection._self_inspects
-class Bundle(ORMColumnsClauseRole, SupportsCloneAnnotations, InspectionAttr):
+class Bundle(
+    ORMColumnsClauseRole,
+    SupportsCloneAnnotations,
+    sql_base.MemoizedHasCacheKey,
+    InspectionAttr,
+):
     """A grouping of SQL expressions that are returned by a :class:`.Query`
     under one namespace.
 
@@ -1412,6 +1417,11 @@ class Bundle(ORMColumnsClauseRole, SupportsCloneAnnotations, InspectionAttr):
         )
         self.single_entity = kw.pop("single_entity", self.single_entity)
 
+    def _gen_cache_key(self, anon_map, bindparams):
+        return (self.__class__, self.name, self.single_entity) + tuple(
+            [expr._gen_cache_key(anon_map, bindparams) for expr in self.exprs]
+        )
+
     @property
     def mapper(self):
         return self.exprs[0]._annotations.get("parentmapper", None)
diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py
index 0040db6da..e530beef2 100644
--- a/lib/sqlalchemy/sql/selectable.py
+++ b/lib/sqlalchemy/sql/selectable.py
@@ -5047,6 +5047,19 @@ class Select(
     _create_select = _create_future_select
 
     @classmethod
+    def _create_raw_select(cls, **kw):
+        """Create a :class:`.Select` using raw ``__new__`` with no coercions.
+
+        Used internally to build up :class:`.Select` constructs with
+        pre-established state.
+
+        """
+
+        stmt = Select.__new__(Select)
+        stmt.__dict__.update(kw)
+        return stmt
+
+    @classmethod
     def _create(cls, *args, **kw):
         r"""Create a :class:`.Select` using either the 1.x or 2.0 constructor
         style.
author	Mike Bayer <mike_mp@zzzcomputing.com>	2021-08-16 17:20:48 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2021-08-17 14:17:00 -0400
commit	1b5ae17384660e9153168d1250003b87da690542 (patch)
tree	a4824e03c85f2bbe664a12a81335f6fec303e52d /lib/sqlalchemy
parent	76b506ed51e31b922014a30de2a5952d1a6ad891 (diff)
download	sqlalchemy-1b5ae17384660e9153168d1250003b87da690542.tar.gz