summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2022-12-16 15:44:32 -0500
committerMike Bayer <mike_mp@zzzcomputing.com>2022-12-18 10:19:52 -0500
commitd480546fbcf6cafcbd166240d9c39e4b9204ccc4 (patch)
tree6389670a763cb93904d456544b194a3600c24fe5 /lib/sqlalchemy
parente84cc158c469f17c90f2e058ed72595bc3be5cdb (diff)
downloadsqlalchemy-d480546fbcf6cafcbd166240d9c39e4b9204ccc4.tar.gz
include pk cols in refresh() if relationships are requested
A series of changes and improvements regarding :meth:`_orm.Session.refresh`. The overall change is that primary key attributes for an object are now included in a refresh operation unconditionally when relationship-bound attributes are to be refreshed, even if not expired and even if not specified in the refresh. * Improved :meth:`_orm.Session.refresh` so that if autoflush is enabled (as is the default for :class:`_orm.Session`), the autoflush takes place at an earlier part of the refresh process so that pending primary key changes are applied without errors being raised. Previously, this autoflush took place too late in the process and the SELECT statement would not use the correct key to locate the row and an :class:`.InvalidRequestError` would be raised. * When the above condition is present, that is, unflushed primary key changes are present on the object, but autoflush is not enabled, the refresh() method now explicitly disallows the operation to proceed, and an informative :class:`.InvalidRequestError` is raised asking that the pending primary key changes be flushed first. Previously, this use case was simply broken and :class:`.InvalidRequestError` would be raised anyway. This restriction is so that it's safe for the primary key attributes to be refreshed, as is necessary for the case of being able to refresh the object with relationship-bound secondary eagerloaders also being emitted. This rule applies in all cases to keep API behavior consistent regardless of whether or not the PK cols are actually needed in the refresh, as it is unusual to be refreshing some attributes on an object while keeping other attributes "pending" in any case. * The :meth:`_orm.Session.refresh` method has been enhanced such that attributes which are :func:`_orm.relationship`-bound and linked to an eager loader, either at mapping time or via last-used loader options, will be refreshed in all cases even when a list of attributes is passed that does not include any columns on the parent row. This builds upon the feature first implemented for non-column attributes as part of :ticket:`1763` fixed in 1.4 allowing eagerly-loaded relationship-bound attributes to participate in the :meth:`_orm.Session.refresh` operation. If the refresh operation does not indicate any columns on the parent row to be refreshed, the primary key columns will nonetheless be included in the refresh operation, which allows the load to proceed into the secondary relationship loaders indicated as it does normally. Previously an :class:`.InvalidRequestError` error would be raised for this condition (:ticket:`8703`) * Fixed issue where an unnecessary additional SELECT would be emitted in the case where :meth:`_orm.Session.refresh` were called with a combination of expired attributes, as well as an eager loader such as :func:`_orm.selectinload` that emits a "secondary" query, if the primary key attributes were also in an expired state. As the primary key attributes are now included in the refresh automatically, there is no additional load for these attributes when a relationship loader goes to select for them (:ticket:`8997`) * Fixed regression caused by :ticket:`8126` released in 2.0.0b1 where the :meth:`_orm.Session.refresh` method would fail with an ``AttributeError``, if passed both an expired column name as well as the name of a relationship-bound attribute that was linked to a "secondary" eagerloader such as the :func:`_orm.selectinload` eager loader (:ticket:`8996`) Fixes: #8703 Fixes: #8996 Fixes: #8997 Fixes: #8126 Change-Id: I88dcbc0a9a8337f6af0bc4bcc5b0261819acd1c4
Diffstat (limited to 'lib/sqlalchemy')
-rw-r--r--lib/sqlalchemy/orm/context.py14
-rw-r--r--lib/sqlalchemy/orm/loading.py156
-rw-r--r--lib/sqlalchemy/orm/session.py13
-rw-r--r--lib/sqlalchemy/orm/strategies.py6
4 files changed, 134 insertions, 55 deletions
diff --git a/lib/sqlalchemy/orm/context.py b/lib/sqlalchemy/orm/context.py
index b3c8e78b3..3bd8b02a7 100644
--- a/lib/sqlalchemy/orm/context.py
+++ b/lib/sqlalchemy/orm/context.py
@@ -1188,15 +1188,11 @@ class ORMSelectCompileState(ORMCompileState, SelectState):
if not self.primary_columns:
if self.compile_options._only_load_props:
- raise sa_exc.InvalidRequestError(
- "No column-based properties specified for "
- "refresh operation. Use session.expire() "
- "to reload collections and related items."
- )
- else:
- raise sa_exc.InvalidRequestError(
- "Query contains no columns with which to SELECT from."
- )
+ assert False, "no columns were included in _only_load_props"
+
+ raise sa_exc.InvalidRequestError(
+ "Query contains no columns with which to SELECT from."
+ )
if not self.from_clauses:
self.from_clauses = list(self._fallback_from_clauses)
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index edfa61287..6e7695f86 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -40,12 +40,12 @@ from .context import FromStatement
from .util import _none_set
from .util import state_str
from .. import exc as sa_exc
-from .. import future
from .. import util
from ..engine import result_tuple
from ..engine.result import ChunkedIteratorResult
from ..engine.result import FrozenResult
from ..engine.result import SimpleResultMetaData
+from ..sql import select
from ..sql import util as sql_util
from ..sql.selectable import ForUpdateArg
from ..sql.selectable import LABEL_STYLE_TABLENAME_PLUS_COL
@@ -468,6 +468,7 @@ def load_on_ident(
no_autoflush: bool = False,
bind_arguments: Mapping[str, Any] = util.EMPTY_DICT,
execution_options: _ExecuteOptions = util.EMPTY_DICT,
+ require_pk_cols: bool = False,
):
"""Load the given identity key from the database."""
if key is not None:
@@ -488,6 +489,7 @@ def load_on_ident(
no_autoflush=no_autoflush,
bind_arguments=bind_arguments,
execution_options=execution_options,
+ require_pk_cols=require_pk_cols,
)
@@ -504,6 +506,7 @@ def load_on_pk_identity(
no_autoflush: bool = False,
bind_arguments: Mapping[str, Any] = util.EMPTY_DICT,
execution_options: _ExecuteOptions = util.EMPTY_DICT,
+ require_pk_cols: bool = False,
):
"""Load the given primary key identity from the database."""
@@ -572,6 +575,71 @@ def load_on_pk_identity(
else:
version_check = False
+ if require_pk_cols and only_load_props:
+ if not refresh_state:
+ raise sa_exc.ArgumentError(
+ "refresh_state is required when require_pk_cols is present"
+ )
+
+ refresh_state_prokeys = refresh_state.mapper._primary_key_propkeys
+ has_changes = {
+ key
+ for key in refresh_state_prokeys.difference(only_load_props)
+ if refresh_state.attrs[key].history.has_changes()
+ }
+ if has_changes:
+ # raise if pending pk changes are present.
+ # technically, this could be limited to the case where we have
+ # relationships in the only_load_props collection to be refreshed
+ # also (and only ones that have a secondary eager loader, at that).
+ # however, the error is in place across the board so that behavior
+ # here is easier to predict. The use case it prevents is one
+ # of mutating PK attrs, leaving them unflushed,
+ # calling session.refresh(), and expecting those attrs to remain
+ # still unflushed. It seems likely someone doing all those
+ # things would be better off having the PK attributes flushed
+ # to the database before tinkering like that (session.refresh() is
+ # tinkering).
+ raise sa_exc.InvalidRequestError(
+ f"Please flush pending primary key changes on "
+ "attributes "
+ f"{has_changes} for mapper {refresh_state.mapper} before "
+ "proceeding with a refresh"
+ )
+
+ # overall, the ORM has no internal flow right now for "dont load the
+ # primary row of an object at all, but fire off
+ # selectinload/subqueryload/immediateload for some relationships".
+ # It would probably be a pretty big effort to add such a flow. So
+ # here, the case for #8703 is introduced; user asks to refresh some
+ # relationship attributes only which are
+ # selectinload/subqueryload/immediateload/ etc. (not joinedload).
+ # ORM complains there's no columns in the primary row to load.
+ # So here, we just add the PK cols if that
+ # case is detected, so that there is a SELECT emitted for the primary
+ # row.
+ #
+ # Let's just state right up front, for this one little case,
+ # the ORM here is adding a whole extra SELECT just to satisfy
+ # limitations in the internal flow. This is really not a thing
+ # SQLAlchemy finds itself doing like, ever, obviously, we are
+ # constantly working to *remove* SELECTs we don't need. We
+ # rationalize this for now based on 1. session.refresh() is not
+ # commonly used 2. session.refresh() with only relationship attrs is
+ # even less commonly used 3. the SELECT in question is very low
+ # latency.
+ #
+ # to add the flow to not include the SELECT, the quickest way
+ # might be to just manufacture a single-row result set to send off to
+ # instances(), but we'd have to weave that into context.py and all
+ # that. For 2.0.0, we have enough big changes to navigate for now.
+ #
+ mp = refresh_state.mapper._props
+ for p in only_load_props:
+ if mp[p]._is_relationship:
+ only_load_props = refresh_state_prokeys.union(only_load_props)
+ break
+
if refresh_state and refresh_state.load_options:
compile_options += {"_current_path": refresh_state.load_path.parent}
q = q.options(*refresh_state.load_options)
@@ -584,6 +652,7 @@ def load_on_pk_identity(
refresh_state=refresh_state,
identity_token=identity_token,
)
+
q._compile_options = new_compile_options
q._order_by = None
@@ -1456,10 +1525,6 @@ def load_scalar_attributes(mapper, state, attribute_names, passive):
"attribute refresh operation cannot proceed" % (state_str(state))
)
- has_key = bool(state.key)
-
- result = False
-
no_autoflush = bool(passive & attributes.NO_AUTOFLUSH)
# in the case of inheritance, particularly concrete and abstract
@@ -1472,16 +1537,17 @@ def load_scalar_attributes(mapper, state, attribute_names, passive):
attribute_names = attribute_names.intersection(mapper.attrs.keys())
if mapper.inherits and not mapper.concrete:
+ # load based on committed attributes in the object, formed into
+ # a truncated SELECT that only includes relevant tables. does not
+ # currently use state.key
statement = mapper._optimized_get_statement(state, attribute_names)
if statement is not None:
- from .query import FromStatement
-
# undefer() isn't needed here because statement has the
# columns needed already, this implicitly undefers that column
stmt = FromStatement(mapper, statement)
- result = load_on_ident(
+ return load_on_ident(
session,
stmt,
None,
@@ -1490,46 +1556,46 @@ def load_scalar_attributes(mapper, state, attribute_names, passive):
no_autoflush=no_autoflush,
)
- if result is False:
- if has_key:
- identity_key = state.key
- else:
- # this codepath is rare - only valid when inside a flush, and the
- # object is becoming persistent but hasn't yet been assigned
- # an identity_key.
- # check here to ensure we have the attrs we need.
- pk_attrs = [
- mapper._columntoproperty[col].key for col in mapper.primary_key
- ]
- if state.expired_attributes.intersection(pk_attrs):
- raise sa_exc.InvalidRequestError(
- "Instance %s cannot be refreshed - it's not "
- " persistent and does not "
- "contain a full primary key." % state_str(state)
- )
- identity_key = mapper._identity_key_from_state(state)
-
- if (
- _none_set.issubset(identity_key) and not mapper.allow_partial_pks
- ) or _none_set.issuperset(identity_key):
- util.warn_limited(
- "Instance %s to be refreshed doesn't "
- "contain a full primary key - can't be refreshed "
- "(and shouldn't be expired, either).",
- state_str(state),
+ # normal load, use state.key as the identity to SELECT
+ has_key = bool(state.key)
+
+ if has_key:
+ identity_key = state.key
+ else:
+ # this codepath is rare - only valid when inside a flush, and the
+ # object is becoming persistent but hasn't yet been assigned
+ # an identity_key.
+ # check here to ensure we have the attrs we need.
+ pk_attrs = [
+ mapper._columntoproperty[col].key for col in mapper.primary_key
+ ]
+ if state.expired_attributes.intersection(pk_attrs):
+ raise sa_exc.InvalidRequestError(
+ "Instance %s cannot be refreshed - it's not "
+ " persistent and does not "
+ "contain a full primary key." % state_str(state)
)
- return
+ identity_key = mapper._identity_key_from_state(state)
- result = load_on_ident(
- session,
- future.select(mapper).set_label_style(
- LABEL_STYLE_TABLENAME_PLUS_COL
- ),
- identity_key,
- refresh_state=state,
- only_load_props=attribute_names,
- no_autoflush=no_autoflush,
+ if (
+ _none_set.issubset(identity_key) and not mapper.allow_partial_pks
+ ) or _none_set.issuperset(identity_key):
+ util.warn_limited(
+ "Instance %s to be refreshed doesn't "
+ "contain a full primary key - can't be refreshed "
+ "(and shouldn't be expired, either).",
+ state_str(state),
)
+ return
+
+ result = load_on_ident(
+ session,
+ select(mapper).set_label_style(LABEL_STYLE_TABLENAME_PLUS_COL),
+ identity_key,
+ refresh_state=state,
+ only_load_props=attribute_names,
+ no_autoflush=no_autoflush,
+ )
# if instance is pending, a refresh operation
# may not complete (even if PK attributes are assigned)
diff --git a/lib/sqlalchemy/orm/session.py b/lib/sqlalchemy/orm/session.py
index 1f7bd6d73..bf3df0599 100644
--- a/lib/sqlalchemy/orm/session.py
+++ b/lib/sqlalchemy/orm/session.py
@@ -2817,6 +2817,14 @@ class Session(_SessionClassMethods, EventTarget):
self._expire_state(state, attribute_names)
+ # this autoflush previously used to occur as a secondary effect
+ # of the load_on_ident below. Meaning we'd organize the SELECT
+ # based on current DB pks, then flush, then if pks changed in that
+ # flush, crash. this was unticketed but discovered as part of
+ # #8703. So here, autoflush up front, dont autoflush inside
+ # load_on_ident.
+ self._autoflush()
+
if with_for_update == {}:
raise sa_exc.ArgumentError(
"with_for_update should be the boolean value "
@@ -2835,6 +2843,11 @@ class Session(_SessionClassMethods, EventTarget):
refresh_state=state,
with_for_update=with_for_update,
only_load_props=attribute_names,
+ require_pk_cols=True,
+ # technically unnecessary as we just did autoflush
+ # above, however removes the additional unnecessary
+ # call to _autoflush()
+ no_autoflush=True,
)
is None
):
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 59031171d..354c65828 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -32,6 +32,7 @@ from . import util as orm_util
from .base import _DEFER_FOR_STATE
from .base import _RAISE_FOR_STATE
from .base import _SET_DEFERRED_EXPIRED
+from .base import ATTR_WAS_SET
from .base import LoaderCallableStatus
from .base import PASSIVE_OFF
from .base import PassiveFlag
@@ -1422,7 +1423,10 @@ class ImmediateLoader(PostLoader):
alternate_effective_path=alternate_effective_path,
execution_options=execution_options,
)
- state.get_impl(key).set_committed_value(state, dict_, value)
+ if value is not ATTR_WAS_SET:
+ state.get_impl(key).set_committed_value(
+ state, dict_, value
+ )
@log.class_logger