diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2023-04-05 11:58:52 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2023-04-21 11:30:40 -0400 |
| commit | cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch) | |
| tree | 3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/sql | |
| parent | 63f51491c5f0cb22883c800a065d7c4b4c54774e (diff) | |
| download | sqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz | |
add deterministic imv returning ordering using sentinel columns
Repaired a major shortcoming which was identified in the
:ref:`engine_insertmanyvalues` performance optimization feature first
introduced in the 2.0 series. This was a continuation of the change in
2.0.9 which disabled the SQL Server version of the feature due to a
reliance in the ORM on apparent row ordering that is not guaranteed to take
place. The fix applies new logic to all "insertmanyvalues" operations,
which takes effect when a new parameter
:paramref:`_dml.Insert.returning.sort_by_parameter_order` on the
:meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults`
methods, that through a combination of alternate SQL forms, direct
correspondence of client side parameters, and in some cases downgrading to
running row-at-a-time, will apply sorting to each batch of returned rows
using correspondence to primary key or other unique values in each row
which can be correlated to the input data.
Performance impact is expected to be minimal as nearly all common primary
key scenarios are suitable for parameter-ordered batching to be
achieved for all backends other than SQLite, while "row-at-a-time"
mode operates with a bare minimum of Python overhead compared to the very
heavyweight approaches used in the 1.x series. For SQLite, there is no
difference in performance when "row-at-a-time" mode is used.
It's anticipated that with an efficient "row-at-a-time" INSERT with
RETURNING batching capability, the "insertmanyvalues" feature can be later
be more easily generalized to third party backends that include RETURNING
support but not necessarily easy ways to guarantee a correspondence
with parameter order.
Fixes: #9618
References: #9603
Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
Diffstat (limited to 'lib/sqlalchemy/sql')
| -rw-r--r-- | lib/sqlalchemy/sql/_typing.py | 6 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/base.py | 51 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/compiler.py | 597 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/crud.py | 129 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/dml.py | 100 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/elements.py | 7 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/schema.py | 324 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/selectable.py | 3 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/type_api.py | 31 |
9 files changed, 1133 insertions, 115 deletions
diff --git a/lib/sqlalchemy/sql/_typing.py b/lib/sqlalchemy/sql/_typing.py index 14b1b9594..596493b7c 100644 --- a/lib/sqlalchemy/sql/_typing.py +++ b/lib/sqlalchemy/sql/_typing.py @@ -12,6 +12,7 @@ from typing import Any from typing import Callable from typing import Dict from typing import Mapping +from typing import NoReturn from typing import Set from typing import Tuple from typing import Type @@ -364,3 +365,8 @@ def _no_kw() -> exc.ArgumentError: "Additional keyword arguments are not accepted by this " "function/method. The presence of **kw is for pep-484 typing purposes" ) + + +def _unexpected_kw(methname: str, kw: Dict[str, Any]) -> NoReturn: + k = list(kw)[0] + raise TypeError(f"{methname} got an unexpected keyword argument '{k}'") diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 6267fd814..253927770 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -30,6 +30,7 @@ from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping +from typing import NamedTuple from typing import NoReturn from typing import Optional from typing import overload @@ -75,6 +76,8 @@ if TYPE_CHECKING: from .elements import NamedColumn from .elements import SQLCoreOperations from .elements import TextClause + from .schema import Column + from .schema import DefaultGenerator from .selectable import _JoinTargetElement from .selectable import _SelectIterable from .selectable import FromClause @@ -122,6 +125,35 @@ _Fn = TypeVar("_Fn", bound=Callable[..., Any]) _AmbiguousTableNameMap = MutableMapping[str, str] +class _DefaultDescriptionTuple(NamedTuple): + arg: Any + is_scalar: Optional[bool] + is_callable: Optional[bool] + is_sentinel: Optional[bool] + + @classmethod + def _from_column_default( + cls, default: Optional[DefaultGenerator] + ) -> _DefaultDescriptionTuple: + return ( + _DefaultDescriptionTuple( + default.arg, # type: ignore + default.is_scalar, + default.is_callable, + default.is_sentinel, + ) + if default + and ( + default.has_arg + or (not default.for_update and default.is_sentinel) + ) + else _DefaultDescriptionTuple(None, None, None, None) + ) + + +_never_select_column = operator.attrgetter("_omit_from_statements") + + class _EntityNamespace(Protocol): def __getattr__(self, key: str) -> SQLCoreOperations[Any]: ... @@ -1303,6 +1335,25 @@ class SchemaVisitor(ClauseVisitor): __traverse_options__ = {"schema_visitor": True} +class _SentinelDefaultCharacterization(Enum): + NONE = "none" + UNKNOWN = "unknown" + CLIENTSIDE = "clientside" + SENTINEL_DEFAULT = "sentinel_default" + SERVERSIDE = "serverside" + IDENTITY = "identity" + SEQUENCE = "sequence" + + +class _SentinelColumnCharacterization(NamedTuple): + columns: Optional[Sequence[Column[Any]]] = None + is_explicit: bool = False + is_autoinc: bool = False + default_characterization: _SentinelDefaultCharacterization = ( + _SentinelDefaultCharacterization.NONE + ) + + _COLKEY = TypeVar("_COLKEY", Union[None, str], str) _COL_co = TypeVar("_COL_co", bound="ColumnElement[Any]", covariant=True) diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 9c68b311a..554a84112 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -41,6 +41,7 @@ from typing import ClassVar from typing import Dict from typing import FrozenSet from typing import Iterable +from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping @@ -70,6 +71,7 @@ from ._typing import is_column_element from ._typing import is_dml from .base import _from_objects from .base import _NONE_NAME +from .base import _SentinelDefaultCharacterization from .base import Executable from .base import NO_ARG from .elements import ClauseElement @@ -81,6 +83,7 @@ from .visitors import prefix_anon_map from .visitors import Visitable from .. import exc from .. import util +from ..util import FastIntFlag from ..util.typing import Literal from ..util.typing import Protocol from ..util.typing import TypedDict @@ -100,6 +103,7 @@ if typing.TYPE_CHECKING: from .elements import ColumnElement from .elements import Label from .functions import Function + from .schema import Table from .selectable import AliasedReturnsRows from .selectable import CompoundSelectState from .selectable import CTE @@ -109,9 +113,14 @@ if typing.TYPE_CHECKING: from .selectable import Select from .selectable import SelectState from .type_api import _BindProcessorType + from .type_api import _SentinelProcessorType from ..engine.cursor import CursorResultMetaData from ..engine.interfaces import _CoreSingleExecuteParams + from ..engine.interfaces import _DBAPIAnyExecuteParams + from ..engine.interfaces import _DBAPIMultiExecuteParams + from ..engine.interfaces import _DBAPISingleExecuteParams from ..engine.interfaces import _ExecuteOptions + from ..engine.interfaces import _GenericSetInputSizesType from ..engine.interfaces import _MutableCoreSingleExecuteParams from ..engine.interfaces import Dialect from ..engine.interfaces import SchemaTranslateMapType @@ -460,12 +469,160 @@ class ExpandedState(NamedTuple): class _InsertManyValues(NamedTuple): - """represents state to use for executing an "insertmanyvalues" statement""" + """represents state to use for executing an "insertmanyvalues" statement. + + The primary consumers of this object are the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods. + + .. versionadded:: 2.0 + + """ is_default_expr: bool + """if True, the statement is of the form + ``INSERT INTO TABLE DEFAULT VALUES``, and can't be rewritten as a "batch" + + """ + single_values_expr: str + """The rendered "values" clause of the INSERT statement. + + This is typically the parenthesized section e.g. "(?, ?, ?)" or similar. + The insertmanyvalues logic uses this string as a search and replace + target. + + """ + insert_crud_params: List[crud._CrudParamElementStr] + """List of Column / bind names etc. used while rewriting the statement""" + num_positional_params_counted: int + """the number of bound parameters in a single-row statement. + + This count may be larger or smaller than the actual number of columns + targeted in the INSERT, as it accommodates for SQL expressions + in the values list that may have zero or more parameters embedded + within them. + + This count is part of what's used to organize rewritten parameter lists + when batching. + + """ + + sort_by_parameter_order: bool = False + """if the deterministic_returnined_order parameter were used on the + insert. + + All of the attributes following this will only be used if this is True. + + """ + + includes_upsert_behaviors: bool = False + """if True, we have to accommodate for upsert behaviors. + + This will in some cases downgrade "insertmanyvalues" that requests + deterministic ordering. + + """ + + sentinel_columns: Optional[Sequence[Column[Any]]] = None + """List of sentinel columns that were located. + + This list is only here if the INSERT asked for + sort_by_parameter_order=True, + and dialect-appropriate sentinel columns were located. + + .. versionadded:: 2.0.10 + + """ + + num_sentinel_columns: int = 0 + """how many sentinel columns are in the above list, if any. + + This is the same as + ``len(sentinel_columns) if sentinel_columns is not None else 0`` + + """ + + sentinel_param_keys: Optional[Sequence[Union[str, int]]] = None + """parameter str keys / int indexes in each param dictionary / tuple + that would link to the client side "sentinel" values for that row, which + we can use to match up parameter sets to result rows. + + This is only present if sentinel_columns is present and the INSERT + statement actually refers to client side values for these sentinel + columns. + + .. versionadded:: 2.0.10 + + """ + + implicit_sentinel: bool = False + """if True, we have exactly one sentinel column and it uses a server side + value, currently has to generate an incrementing integer value. + + The dialect in question would have asserted that it supports receiving + these values back and sorting on that value as a means of guaranteeing + correlation with the incoming parameter list. + + .. versionadded:: 2.0.10 + + """ + + embed_values_counter: bool = False + """Whether to embed an incrementing integer counter in each parameter + set within the VALUES clause as parameters are batched over. + + This is only used for a specific INSERT..SELECT..VALUES..RETURNING syntax + where a subquery is used to produce value tuples. Current support + includes PostgreSQL, Microsoft SQL Server. + + .. versionadded:: 2.0.10 + + """ + + +class _InsertManyValuesBatch(NamedTuple): + """represents an individual batch SQL statement for insertmanyvalues. + + This is passed through the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods out + to the :class:`.Connection` within the + :meth:`.Connection._exec_insertmany_context` method. + + .. versionadded:: 2.0.10 + + """ + + replaced_statement: str + replaced_parameters: _DBAPIAnyExecuteParams + processed_setinputsizes: Optional[_GenericSetInputSizesType] + batch: Sequence[_DBAPISingleExecuteParams] + batch_size: int + batchnum: int + total_batches: int + rows_sorted: bool + is_downgraded: bool + + +class InsertmanyvaluesSentinelOpts(FastIntFlag): + """bitflag enum indicating styles of PK defaults + which can work as implicit sentinel columns + + """ + + NOT_SUPPORTED = 1 + AUTOINCREMENT = 2 + IDENTITY = 4 + SEQUENCE = 8 + + ANY_AUTOINCREMENT = AUTOINCREMENT | IDENTITY | SEQUENCE + _SUPPORTED_OR_NOT = NOT_SUPPORTED | ANY_AUTOINCREMENT + + USE_INSERT_FROM_SELECT = 16 + RENDER_SELECT_COL_CASTS = 64 class CompilerState(IntEnum): @@ -1484,6 +1641,7 @@ class SQLCompiler(Compiled): if self._insertmanyvalues: positions = [] + single_values_expr = re.sub( self._positional_pattern, find_position, @@ -1499,13 +1657,19 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( single_values_expr=single_values_expr, insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) def _process_numeric(self): @@ -1574,15 +1738,21 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( # This has the numbers (:1, :2) single_values_expr=single_values_expr, # The single binds are instead %s so they can be formatted insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) @util.memoized_property @@ -1611,6 +1781,23 @@ class SQLCompiler(Compiled): if value is not None } + @util.memoized_property + def _imv_sentinel_value_resolvers( + self, + ) -> Optional[Sequence[Optional[_SentinelProcessorType[Any]]]]: + imv = self._insertmanyvalues + if imv is None or imv.sentinel_columns is None: + return None + + sentinel_value_resolvers = [ + _scol.type._cached_sentinel_value_processor(self.dialect) + for _scol in imv.sentinel_columns + ] + if util.NONE_SET.issuperset(sentinel_value_resolvers): + return None + else: + return sentinel_value_resolvers + def is_subquery(self): return len(self.stack) > 1 @@ -5023,27 +5210,111 @@ class SQLCompiler(Compiled): ) return dialect_hints, table_text - def _insert_stmt_should_use_insertmanyvalues(self, statement): - return ( - self.dialect.supports_multivalues_insert - and self.dialect.use_insertmanyvalues - # note self.implicit_returning or self._result_columns - # implies self.dialect.insert_returning capability - and ( - self.dialect.use_insertmanyvalues_wo_returning - or self.implicit_returning - or self._result_columns + # within the realm of "insertmanyvalues sentinel columns", + # these lookups match different kinds of Column() configurations + # to specific backend capabilities. they are broken into two + # lookups, one for autoincrement columns and the other for non + # autoincrement columns + _sentinel_col_non_autoinc_lookup = util.immutabledict( + { + _SentinelDefaultCharacterization.CLIENTSIDE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.SENTINEL_DEFAULT: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.IDENTITY: ( + InsertmanyvaluesSentinelOpts.IDENTITY + ), + _SentinelDefaultCharacterization.SEQUENCE: ( + InsertmanyvaluesSentinelOpts.SEQUENCE + ), + } + ) + _sentinel_col_autoinc_lookup = _sentinel_col_non_autoinc_lookup.union( + { + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts.AUTOINCREMENT + ), + } + ) + + def _get_sentinel_column_for_table( + self, table: Table + ) -> Optional[Sequence[Column[Any]]]: + """given a :class:`.Table`, return a usable sentinel column or + columns for this dialect if any. + + Return None if no sentinel columns could be identified, or raise an + error if a column was marked as a sentinel explicitly but isn't + compatible with this dialect. + + """ + + sentinel_opts = self.dialect.insertmanyvalues_implicit_sentinel + sentinel_characteristics = table._sentinel_column_characteristics + + sent_cols = sentinel_characteristics.columns + + if sent_cols is None: + return None + + if sentinel_characteristics.is_autoinc: + bitmask = self._sentinel_col_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 ) - ) + else: + bitmask = self._sentinel_col_non_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 + ) + + if sentinel_opts & bitmask: + return sent_cols + + if sentinel_characteristics.is_explicit: + # a column was explicitly marked as insert_sentinel=True, + # however it is not compatible with this dialect. they should + # not indicate this column as a sentinel if they need to include + # this dialect. + + # TODO: do we want non-primary key explicit sentinel cols + # that can gracefully degrade for some backends? + # insert_sentinel="degrade" perhaps. not for the initial release. + # I am hoping people are generally not dealing with this sentinel + # business at all. + + # if is_explicit is True, there will be only one sentinel column. + + raise exc.InvalidRequestError( + f"Column {sent_cols[0]} can't be explicitly " + "marked as a sentinel column when using the " + f"{self.dialect.name} dialect, as the " + "particular type of default generation on this column is " + "not currently compatible with this dialect's specific " + f"INSERT..RETURNING syntax which can receive the " + "server-generated value in " + "a deterministic way. To remove this error, remove " + "insert_sentinel=True from primary key autoincrement " + "columns; these columns are automatically used as " + "sentinels for supported dialects in any case." + ) + + return None def _deliver_insertmanyvalues_batches( - self, statement, parameters, generic_setinputsizes, batch_size - ): + self, + statement: str, + parameters: _DBAPIMultiExecuteParams, + generic_setinputsizes: Optional[_GenericSetInputSizesType], + batch_size: int, + sort_by_parameter_order: bool, + ) -> Iterator[_InsertManyValuesBatch]: imv = self._insertmanyvalues assert imv is not None - executemany_values = f"({imv.single_values_expr})" - lenparams = len(parameters) if imv.is_default_expr and not self.dialect.supports_default_metavalue: # backend doesn't support @@ -5058,19 +5329,41 @@ class SQLCompiler(Compiled): # cursor.lastrowid etc. still goes through the more heavyweight # "ExecutionContext per statement" system as it isn't usable # as a generic "RETURNING" approach - for batchnum, param in enumerate(parameters, 1): - yield ( + use_row_at_a_time = True + downgraded = False + elif not self.dialect.supports_multivalues_insert or ( + sort_by_parameter_order + and self._result_columns + and (imv.sentinel_columns is None or imv.includes_upsert_behaviors) + ): + # deterministic order was requested and the compiler could + # not organize sentinel columns for this dialect/statement. + # use row at a time + use_row_at_a_time = True + downgraded = True + else: + use_row_at_a_time = False + downgraded = False + + if use_row_at_a_time: + for batchnum, param in enumerate( + cast("Sequence[_DBAPISingleExecuteParams]", parameters), 1 + ): + yield _InsertManyValuesBatch( statement, param, generic_setinputsizes, + [param], + batch_size, batchnum, lenparams, + sort_by_parameter_order, + downgraded, ) return - else: - statement = statement.replace( - executemany_values, "__EXECMANY_TOKEN__" - ) + + executemany_values = f"({imv.single_values_expr})" + statement = statement.replace(executemany_values, "__EXECMANY_TOKEN__") # Use optional insertmanyvalues_max_parameters # to further shrink the batch size so that there are no more than @@ -5094,7 +5387,7 @@ class SQLCompiler(Compiled): batches = list(parameters) - processed_setinputsizes = None + processed_setinputsizes: Optional[_GenericSetInputSizesType] = None batchnum = 1 total_batches = lenparams // batch_size + ( 1 if lenparams % batch_size else 0 @@ -5124,10 +5417,14 @@ class SQLCompiler(Compiled): ) return formatted + if imv.embed_values_counter: + imv_values_counter = ", _IMV_VALUES_COUNTER" + else: + imv_values_counter = "" formatted_values_clause = f"""({', '.join( apply_placeholders(bind_keys, formatted) for _, _, formatted, bind_keys in insert_crud_params - )})""" + )}{imv_values_counter})""" keys_to_replace = all_keys.intersection( escaped_bind_names.get(key, key) @@ -5143,7 +5440,13 @@ class SQLCompiler(Compiled): formatted_values_clause = "" keys_to_replace = set() base_parameters = {} - executemany_values_w_comma = f"({imv.single_values_expr}), " + + if imv.embed_values_counter: + executemany_values_w_comma = ( + f"({imv.single_values_expr}, _IMV_VALUES_COUNTER), " + ) + else: + executemany_values_w_comma = f"({imv.single_values_expr}), " all_names_we_will_expand: Set[str] = set() for elem in imv.insert_crud_params: @@ -5176,7 +5479,7 @@ class SQLCompiler(Compiled): ) while batches: - batch = batches[0:batch_size] + batch = cast("Sequence[Any]", batches[0:batch_size]) batches[0:batch_size] = [] if generic_setinputsizes: @@ -5196,7 +5499,7 @@ class SQLCompiler(Compiled): if self.positional: num_ins_params = imv.num_positional_params_counted - batch_iterator: Iterable[Tuple[Any, ...]] + batch_iterator: Iterable[Sequence[Any]] if num_ins_params == len(batch[0]): extra_params_left = extra_params_right = () batch_iterator = batch @@ -5208,9 +5511,19 @@ class SQLCompiler(Compiled): for b in batch ) - expanded_values_string = ( - executemany_values_w_comma * len(batch) - )[:-2] + if imv.embed_values_counter: + expanded_values_string = ( + "".join( + executemany_values_w_comma.replace( + "_IMV_VALUES_COUNTER", str(i) + ) + for i, _ in enumerate(batch) + ) + )[:-2] + else: + expanded_values_string = ( + (executemany_values_w_comma * len(batch)) + )[:-2] if self._numeric_binds and num_ins_params > 0: # numeric will always number the parameters inside of @@ -5254,12 +5567,14 @@ class SQLCompiler(Compiled): replaced_parameters = base_parameters.copy() for i, param in enumerate(batch): - replaced_values_clauses.append( - formatted_values_clause.replace( - "EXECMANY_INDEX__", str(i) - ) + + fmv = formatted_values_clause.replace( + "EXECMANY_INDEX__", str(i) ) + if imv.embed_values_counter: + fmv = fmv.replace("_IMV_VALUES_COUNTER", str(i)) + replaced_values_clauses.append(fmv) replaced_parameters.update( {f"{key}__{i}": param[key] for key in keys_to_replace} ) @@ -5269,12 +5584,16 @@ class SQLCompiler(Compiled): ", ".join(replaced_values_clauses), ) - yield ( + yield _InsertManyValuesBatch( replaced_statement, replaced_parameters, processed_setinputsizes, + batch, + batch_size, batchnum, total_batches, + sort_by_parameter_order, + False, ) batchnum += 1 @@ -5360,6 +5679,13 @@ class SQLCompiler(Compiled): "version settings does not support " "in-place multirow inserts." % self.dialect.name ) + elif ( + self.implicit_returning or insert_stmt._returning + ) and insert_stmt._sort_by_parameter_order: + raise exc.CompileError( + "RETURNING cannot be determinstically sorted when " + "using an INSERT which includes multi-row values()." + ) crud_params_single = crud_params_struct.single_params else: crud_params_single = crud_params_struct.single_params @@ -5390,11 +5716,82 @@ class SQLCompiler(Compiled): [expr for _, expr, _, _ in crud_params_single] ) - if self.implicit_returning or insert_stmt._returning: + # look for insertmanyvalues attributes that would have been configured + # by crud.py as it scanned through the columns to be part of the + # INSERT + use_insertmanyvalues = crud_params_struct.use_insertmanyvalues + named_sentinel_params: Optional[Sequence[str]] = None + add_sentinel_cols = None + implicit_sentinel = False + + returning_cols = self.implicit_returning or insert_stmt._returning + if returning_cols: + + add_sentinel_cols = crud_params_struct.use_sentinel_columns + + if add_sentinel_cols is not None: + assert use_insertmanyvalues + + # search for the sentinel column explicitly present + # in the INSERT columns list, and additionally check that + # this column has a bound parameter name set up that's in the + # parameter list. If both of these cases are present, it means + # we will have a client side value for the sentinel in each + # parameter set. + + _params_by_col = { + col: param_names + for col, _, _, param_names in crud_params_single + } + named_sentinel_params = [] + for _add_sentinel_col in add_sentinel_cols: + if _add_sentinel_col not in _params_by_col: + named_sentinel_params = None + break + param_name = self._within_exec_param_key_getter( + _add_sentinel_col + ) + if param_name not in _params_by_col[_add_sentinel_col]: + named_sentinel_params = None + break + named_sentinel_params.append(param_name) + + if named_sentinel_params is None: + # if we are not going to have a client side value for + # the sentinel in the parameter set, that means it's + # an autoincrement, an IDENTITY, or a server-side SQL + # expression like nextval('seqname'). So this is + # an "implicit" sentinel; we will look for it in + # RETURNING + # only, and then sort on it. For this case on PG, + # SQL Server we have to use a special INSERT form + # that guarantees the server side function lines up with + # the entries in the VALUES. + if ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ): + implicit_sentinel = True + else: + # here, we are not using a sentinel at all + # and we are likely the SQLite dialect. + # The first add_sentinel_col that we have should not + # be marked as "insert_sentinel=True". if it was, + # an error should have been raised in + # _get_sentinel_column_for_table. + assert not add_sentinel_cols[0]._insert_sentinel, ( + "sentinel selection rules should have prevented " + "us from getting here for this dialect" + ) + + # always put the sentinel columns last. even if they are + # in the returning list already, they will be there twice + # then. + returning_cols = list(returning_cols) + list(add_sentinel_cols) returning_clause = self.returning_clause( insert_stmt, - self.implicit_returning or insert_stmt._returning, + returning_cols, populate_result_map=toplevel, ) @@ -5423,9 +5820,8 @@ class SQLCompiler(Compiled): text += " %s" % select_text elif not crud_params_single and supports_default_values: text += " DEFAULT VALUES" - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + self._insertmanyvalues = _InsertManyValues( True, self.dialect.default_metavalue_token, @@ -5433,6 +5829,17 @@ class SQLCompiler(Compiled): "List[crud._CrudParamElementStr]", crud_params_single ), counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + implicit_sentinel=implicit_sentinel, ) elif compile_state._has_multi_parameters: text += " VALUES %s" % ( @@ -5440,11 +5847,9 @@ class SQLCompiler(Compiled): "(%s)" % (", ".join(value for _, _, value, _ in crud_param_set)) for crud_param_set in crud_params_struct.all_multi_params - ) + ), ) else: - # TODO: why is third element of crud_params_single not str - # already? insert_single_values_expr = ", ".join( [ value @@ -5455,20 +5860,90 @@ class SQLCompiler(Compiled): ] ) - text += " VALUES (%s)" % insert_single_values_expr - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + + if ( + implicit_sentinel + and ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + ) + # this is checking if we have + # INSERT INTO table (id) VALUES (DEFAULT). + and not (crud_params_struct.is_default_metavalue_only) + ): + # if we have a sentinel column that is server generated, + # then for selected backends render the VALUES list as a + # subquery. This is the orderable form supported by + # PostgreSQL and SQL Server. + embed_sentinel_value = True + + render_bind_casts = ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS + ) + + colnames = ", ".join( + f"p{i}" for i, _ in enumerate(crud_params_single) + ) + + if render_bind_casts: + # render casts for the SELECT list. For PG, we are + # already rendering bind casts in the parameter list, + # selectively for the more "tricky" types like ARRAY. + # however, even for the "easy" types, if the parameter + # is NULL for every entry, PG gives up and says + # "it must be TEXT", which fails for other easy types + # like ints. So we cast on this side too. + colnames_w_cast = ", ".join( + self.render_bind_cast( + col.type, + col.type._unwrapped_dialect_impl(self.dialect), + f"p{i}", + ) + for i, (col, *_) in enumerate(crud_params_single) + ) + else: + colnames_w_cast = colnames + + text += ( + f" SELECT {colnames_w_cast} FROM " + f"(VALUES ({insert_single_values_expr})) " + f"AS imp_sen({colnames}, sen_counter) " + "ORDER BY sen_counter" + ) + else: + # otherwise, if no sentinel or backend doesn't support + # orderable subquery form, use a plain VALUES list + embed_sentinel_value = False + text += f" VALUES ({insert_single_values_expr})" + self._insertmanyvalues = _InsertManyValues( - False, - insert_single_values_expr, - cast( + is_default_expr=False, + single_values_expr=insert_single_values_expr, + insert_crud_params=cast( "List[crud._CrudParamElementStr]", crud_params_single, ), - counted_bindparam, + num_positional_params_counted=counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + sentinel_param_keys=named_sentinel_params, + implicit_sentinel=implicit_sentinel, + embed_values_counter=embed_sentinel_value, ) + else: + text += f" VALUES ({insert_single_values_expr})" + if insert_stmt._post_values_clause is not None: post_values_clause = self.process( insert_stmt._post_values_clause, **kw diff --git a/lib/sqlalchemy/sql/crud.py b/lib/sqlalchemy/sql/crud.py index 04b62d1ff..563f61c04 100644 --- a/lib/sqlalchemy/sql/crud.py +++ b/lib/sqlalchemy/sql/crud.py @@ -34,6 +34,7 @@ from . import coercions from . import dml from . import elements from . import roles +from .base import _DefaultDescriptionTuple from .dml import isinsert as _compile_state_isinsert from .elements import ColumnClause from .schema import default_is_clause_element @@ -53,6 +54,7 @@ if TYPE_CHECKING: from .elements import ColumnElement from .elements import KeyedColumnElement from .schema import _SQLExprDefault + from .schema import Column REQUIRED = util.symbol( "REQUIRED", @@ -79,20 +81,22 @@ def _as_dml_column(c: ColumnElement[Any]) -> ColumnClause[Any]: _CrudParamElement = Tuple[ "ColumnElement[Any]", - str, - Optional[Union[str, "_SQLExprDefault"]], + str, # column name + Optional[ + Union[str, "_SQLExprDefault"] + ], # bound parameter string or SQL expression to apply Iterable[str], ] _CrudParamElementStr = Tuple[ "KeyedColumnElement[Any]", str, # column name - str, # placeholder + str, # bound parameter string Iterable[str], ] _CrudParamElementSQLExpr = Tuple[ "ColumnClause[Any]", str, - "_SQLExprDefault", + "_SQLExprDefault", # SQL expression to apply Iterable[str], ] @@ -101,8 +105,10 @@ _CrudParamSequence = List[_CrudParamElement] class _CrudParams(NamedTuple): single_params: _CrudParamSequence - all_multi_params: List[Sequence[_CrudParamElementStr]] + is_default_metavalue_only: bool = False + use_insertmanyvalues: bool = False + use_sentinel_columns: Optional[Sequence[Column[Any]]] = None def _get_crud_params( @@ -206,6 +212,7 @@ def _get_crud_params( (c.key,), ) for c in stmt.table.columns + if not c._omit_from_statements ], [], ) @@ -301,8 +308,10 @@ def _get_crud_params( toplevel, kw, ) + use_insertmanyvalues = False + use_sentinel_columns = None else: - _scan_cols( + use_insertmanyvalues, use_sentinel_columns = _scan_cols( compiler, stmt, compile_state, @@ -328,6 +337,8 @@ def _get_crud_params( % (", ".join("%s" % (c,) for c in check)) ) + is_default_metavalue_only = False + if ( _compile_state_isinsert(compile_state) and compile_state._has_multi_parameters @@ -363,8 +374,15 @@ def _get_crud_params( (), ) ] - - return _CrudParams(values, []) + is_default_metavalue_only = True + + return _CrudParams( + values, + [], + is_default_metavalue_only=is_default_metavalue_only, + use_insertmanyvalues=use_insertmanyvalues, + use_sentinel_columns=use_sentinel_columns, + ) @overload @@ -527,7 +545,19 @@ def _scan_insert_from_select_cols( if stmt.include_insert_from_select_defaults: col_set = set(cols) for col in stmt.table.columns: - if col not in col_set and col.default: + # omit columns that were not in the SELECT statement. + # this will omit columns marked as omit_from_statements naturally, + # as long as that col was not explicit in the SELECT. + # if an omit_from_statements col has a "default" on it, then + # we need to include it, as these defaults should still fire off. + # but, if it has that default and it's the "sentinel" default, + # we don't do sentinel default operations for insert_from_select + # here so we again omit it. + if ( + col not in col_set + and col.default + and not col.default.is_sentinel + ): cols.append(col) for c in cols: @@ -579,6 +609,8 @@ def _scan_cols( implicit_returning, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) = _get_returning_modifiers(compiler, stmt, compile_state, toplevel) assert compile_state.isupdate or compile_state.isinsert @@ -672,9 +704,12 @@ def _scan_cols( elif c.default is not None: # column has a default, but it's not a pk column, or it is but # we don't need to get the pk back. - _append_param_insert_hasdefault( - compiler, stmt, c, implicit_return_defaults, values, kw - ) + if not c.default.is_sentinel or ( + use_sentinel_columns is not None + ): + _append_param_insert_hasdefault( + compiler, stmt, c, implicit_return_defaults, values, kw + ) elif c.server_default is not None: # column has a DDL-level default, and is either not a pk @@ -730,6 +765,8 @@ def _scan_cols( if c in remaining_supplemental ) + return (use_insertmanyvalues, use_sentinel_columns) + def _setup_delete_return_defaults( compiler, @@ -744,7 +781,7 @@ def _setup_delete_return_defaults( toplevel, kw, ): - (_, _, implicit_return_defaults, _) = _get_returning_modifiers( + (_, _, implicit_return_defaults, *_) = _get_returning_modifiers( compiler, stmt, compile_state, toplevel ) @@ -1248,6 +1285,18 @@ class _multiparam_column(elements.ColumnElement[Any]): and other.original == self.original ) + @util.memoized_property + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @util.memoized_property + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + def _process_multiparam_default_bind( compiler: SQLCompiler, @@ -1459,16 +1508,15 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): """ + dialect = compiler.dialect + need_pks = ( toplevel and _compile_state_isinsert(compile_state) and not stmt._inline and ( not compiler.for_executemany - or ( - compiler.dialect.insert_executemany_returning - and stmt._return_defaults - ) + or (dialect.insert_executemany_returning and stmt._return_defaults) ) and not stmt._returning # and (not stmt._returning or stmt._return_defaults) @@ -1479,7 +1527,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # after the INSERT if that's all we need. postfetch_lastrowid = ( need_pks - and compiler.dialect.postfetch_lastrowid + and dialect.postfetch_lastrowid and stmt.table._autoincrement_column is not None ) @@ -1491,7 +1539,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): need_pks # the dialect can veto it if it just doesnt support RETURNING # with INSERT - and compiler.dialect.insert_returning + and dialect.insert_returning # user-defined implicit_returning on Table can veto it and compile_state._primary_table.implicit_returning # the compile_state can veto it (SQlite uses this to disable @@ -1506,10 +1554,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # and a lot of weird use cases are supported by it. # SQLite lastrowid times 3x faster than returning, # Mariadb lastrowid 2x faster than returning - ( - not postfetch_lastrowid - or compiler.dialect.favor_returning_over_lastrowid - ) + (not postfetch_lastrowid or dialect.favor_returning_over_lastrowid) or compile_state._has_multi_parameters or stmt._return_defaults ) @@ -1521,25 +1566,57 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): should_implicit_return_defaults = ( implicit_returning and stmt._return_defaults ) + explicit_returning = should_implicit_return_defaults or stmt._returning + use_insertmanyvalues = ( + toplevel + and compiler.for_executemany + and dialect.use_insertmanyvalues + and ( + explicit_returning or dialect.use_insertmanyvalues_wo_returning + ) + ) + + use_sentinel_columns = None + if ( + use_insertmanyvalues + and explicit_returning + and stmt._sort_by_parameter_order + ): + use_sentinel_columns = compiler._get_sentinel_column_for_table( + stmt.table + ) + elif compile_state.isupdate: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.update_returning + and dialect.update_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None elif compile_state.isdelete: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.delete_returning + and dialect.delete_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None else: should_implicit_return_defaults = False # pragma: no cover + use_insertmanyvalues = False + use_sentinel_columns = None if should_implicit_return_defaults: if not stmt._return_defaults_columns: + # TODO: this is weird. See #9685 where we have to + # take an extra step to prevent this from happening. why + # would this ever be *all* columns? but if we set to blank, then + # that seems to break things also in the ORM. So we should + # try to clean this up and figure out what return_defaults + # needs to do w/ the ORM etc. here implicit_return_defaults = set(stmt.table.c) else: implicit_return_defaults = set(stmt._return_defaults_columns) @@ -1551,6 +1628,8 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): implicit_returning or should_implicit_return_defaults, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py index 830f845b4..911061640 100644 --- a/lib/sqlalchemy/sql/dml.py +++ b/lib/sqlalchemy/sql/dml.py @@ -32,8 +32,8 @@ from typing import Union from . import coercions from . import roles from . import util as sql_util -from ._typing import _no_kw from ._typing import _TP +from ._typing import _unexpected_kw from ._typing import is_column_element from ._typing import is_named_from_clause from .base import _entity_namespace_key @@ -455,6 +455,7 @@ class UpdateBase( self, *cols: _DMLColumnArgument, supplemental_cols: Optional[Iterable[_DMLColumnArgument]] = None, + sort_by_parameter_order: bool = False, ) -> Self: """Make use of a :term:`RETURNING` clause for the purpose of fetching server-side expressions and defaults, for supporting @@ -603,6 +604,20 @@ class UpdateBase( .. versionadded:: 2.0 + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT + .. seealso:: :meth:`.UpdateBase.returning` @@ -636,7 +651,13 @@ class UpdateBase( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) self._return_defaults = True - + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to " + "return_defaults() only applies to INSERT statements" + ) + self._sort_by_parameter_order = True if supplemental_cols: # uniquifying while also maintaining order (the maintain of order # is for test suites but also for vertical splicing @@ -661,7 +682,10 @@ class UpdateBase( @_generative def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> UpdateBase: r"""Add a :term:`RETURNING` or equivalent clause to this statement. @@ -723,6 +747,25 @@ class UpdateBase( read the documentation notes for the database in use in order to determine the availability of RETURNING. + :param \*cols: series of columns, SQL expressions, or whole tables + entities to be returned. + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT (Core level discussion) + + :ref:`orm_queryguide_bulk_insert_returning_ordered` - example of + use with :ref:`orm_queryguide_bulk_insert` (ORM level discussion) + .. seealso:: :meth:`.UpdateBase.return_defaults` - an alternative method tailored @@ -733,7 +776,7 @@ class UpdateBase( """ # noqa: E501 if __kw: - raise _no_kw() + raise _unexpected_kw("UpdateBase.returning()", __kw) if self._return_defaults: raise exc.InvalidRequestError( "return_defaults() is already configured on this statement" @@ -741,6 +784,13 @@ class UpdateBase( self._returning += tuple( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to returning() " + "only applies to INSERT statements" + ) + self._sort_by_parameter_order = True return self def corresponding_column( @@ -1123,6 +1173,8 @@ class Insert(ValuesBase): select = None include_insert_from_select_defaults = False + _sort_by_parameter_order: bool = False + is_insert = True table: TableClause @@ -1143,6 +1195,7 @@ class Insert(ValuesBase): "_return_defaults_columns", InternalTraversal.dp_clauseelement_tuple, ), + ("_sort_by_parameter_order", InternalTraversal.dp_boolean), ] + HasPrefixes._has_prefixes_traverse_internals + DialectKWArgs._dialect_kwargs_traverse_internals @@ -1231,24 +1284,35 @@ class Insert(ValuesBase): if TYPE_CHECKING: - # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 + # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 ", *, sort_by_parameter_order: bool = False" # noqa: E501 # code within this block is **programmatically, # statically generated** by tools/generate_tuple_map_overloads.py @overload - def returning(self, __ent0: _TCCA[_T0]) -> ReturningInsert[Tuple[_T0]]: + def returning( + self, __ent0: _TCCA[_T0], *, sort_by_parameter_order: bool = False + ) -> ReturningInsert[Tuple[_T0]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1], __ent2: _TCCA[_T2] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + __ent2: _TCCA[_T2], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2]]: ... @@ -1259,6 +1323,8 @@ class Insert(ValuesBase): __ent1: _TCCA[_T1], __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3]]: ... @@ -1270,6 +1336,8 @@ class Insert(ValuesBase): __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4]]: ... @@ -1282,6 +1350,8 @@ class Insert(ValuesBase): __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5]]: ... @@ -1295,6 +1365,8 @@ class Insert(ValuesBase): __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6]]: ... @@ -1309,6 +1381,8 @@ class Insert(ValuesBase): __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], __ent7: _TCCA[_T7], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7]]: ... @@ -1316,12 +1390,18 @@ class Insert(ValuesBase): @overload def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 0f356ae27..694faee5f 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -91,7 +91,6 @@ if typing.TYPE_CHECKING: from .compiler import SQLCompiler from .functions import FunctionElement from .operators import OperatorType - from .schema import _ServerDefaultType from .schema import Column from .schema import DefaultGenerator from .schema import FetchedValue @@ -1263,6 +1262,8 @@ class ColumnElement( primary_key: bool = False _is_clone_of: Optional[ColumnElement[_T]] _is_column_element = True + _insert_sentinel: bool = False + _omit_from_statements = False foreign_keys: AbstractSet[ForeignKey] = frozenset() @@ -2212,6 +2213,8 @@ class TextClause( _render_label_in_columns_clause = False + _omit_from_statements = False + @property def _hide_froms(self) -> Iterable[FromClause]: return () @@ -4667,7 +4670,7 @@ class ColumnClause( onupdate: Optional[DefaultGenerator] = None default: Optional[DefaultGenerator] = None - server_default: Optional[_ServerDefaultType] = None + server_default: Optional[FetchedValue] = None server_onupdate: Optional[FetchedValue] = None _is_multiparam_column = False diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index 7d964ad05..192096469 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -58,7 +58,10 @@ from . import ddl from . import roles from . import type_api from . import visitors +from .base import _DefaultDescriptionTuple from .base import _NoneName +from .base import _SentinelColumnCharacterization +from .base import _SentinelDefaultCharacterization from .base import DedupeColumnCollection from .base import DialectKWArgs from .base import Executable @@ -77,6 +80,7 @@ from .. import event from .. import exc from .. import inspection from .. import util +from ..util import HasMemoized from ..util.typing import Final from ..util.typing import Literal from ..util.typing import Protocol @@ -107,7 +111,6 @@ if typing.TYPE_CHECKING: _T = TypeVar("_T", bound="Any") _SI = TypeVar("_SI", bound="SchemaItem") -_ServerDefaultType = Union["FetchedValue", str, TextClause, ColumnElement[Any]] _TAB = TypeVar("_TAB", bound="Table") @@ -115,6 +118,10 @@ _CreateDropBind = Union["Engine", "Connection", "MockConnection"] _ConstraintNameArgument = Optional[Union[str, _NoneName]] +_ServerDefaultArgument = Union[ + "FetchedValue", str, TextClause, ColumnElement[Any] +] + class SchemaConst(Enum): @@ -345,6 +352,8 @@ class Table( _columns: DedupeColumnCollection[Column[Any]] + _sentinel_column: Optional[Column[Any]] + constraints: Set[Constraint] """A collection of all :class:`_schema.Constraint` objects associated with this :class:`_schema.Table`. @@ -819,6 +828,8 @@ class Table( assert isinstance(schema, str) self.schema = quoted_name(schema, quote_schema) + self._sentinel_column = None + self.indexes = set() self.constraints = set() PrimaryKeyConstraint( @@ -1005,6 +1016,140 @@ class Table( def _autoincrement_column(self) -> Optional[Column[int]]: return self.primary_key._autoincrement_column + @util.ro_memoized_property + def _sentinel_column_characteristics( + self, + ) -> _SentinelColumnCharacterization: + """determine a candidate column (or columns, in case of a client + generated composite primary key) which can be used as an + "insert sentinel" for an INSERT statement. + + The returned structure, :class:`_SentinelColumnCharacterization`, + includes all the details needed by :class:`.Dialect` and + :class:`.SQLCompiler` to determine if these column(s) can be used + as an INSERT..RETURNING sentinel for a particular database + dialect. + + .. versionadded:: 2.0.10 + + """ + + sentinel_is_explicit = False + sentinel_is_autoinc = False + the_sentinel: Optional[_typing_Sequence[Column[Any]]] = None + + # see if a column was explicitly marked "insert_sentinel=True". + explicit_sentinel_col = self._sentinel_column + + if explicit_sentinel_col is not None: + the_sentinel = (explicit_sentinel_col,) + sentinel_is_explicit = True + + autoinc_col = self._autoincrement_column + if sentinel_is_explicit and explicit_sentinel_col is autoinc_col: + assert autoinc_col is not None + sentinel_is_autoinc = True + elif explicit_sentinel_col is None and autoinc_col is not None: + the_sentinel = (autoinc_col,) + sentinel_is_autoinc = True + + default_characterization = _SentinelDefaultCharacterization.UNKNOWN + + if the_sentinel: + the_sentinel_zero = the_sentinel[0] + if the_sentinel_zero.identity: + + if the_sentinel_zero.identity._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use IDENTITY default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + else: + default_characterization = ( + _SentinelDefaultCharacterization.IDENTITY + ) + elif ( + the_sentinel_zero.default is None + and the_sentinel_zero.server_default is None + ): + if the_sentinel_zero.nullable: + raise exc.InvalidRequestError( + f"Column {the_sentinel_zero} has been marked as a " + "sentinel " + "column with no default generation function; it " + "at least needs to be marked nullable=False assuming " + "user-populated sentinel values will be used." + ) + default_characterization = ( + _SentinelDefaultCharacterization.NONE + ) + elif the_sentinel_zero.default is not None: + if the_sentinel_zero.default.is_sentinel: + default_characterization = ( + _SentinelDefaultCharacterization.SENTINEL_DEFAULT + ) + elif default_is_sequence(the_sentinel_zero.default): + + if the_sentinel_zero.default._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use SEQUENCE default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + + default_characterization = ( + _SentinelDefaultCharacterization.SEQUENCE + ) + elif the_sentinel_zero.default.is_callable: + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + elif the_sentinel_zero.server_default is not None: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + f"Column {the_sentinel[0]} can't be a sentinel column " + "because it uses an explicit server side default " + "that's not the Identity() default." + ) + + default_characterization = ( + _SentinelDefaultCharacterization.SERVERSIDE + ) + + if the_sentinel is None and self.primary_key: + assert autoinc_col is None + + # determine for non-autoincrement pk if all elements are + # client side + for _pkc in self.primary_key: + if _pkc.server_default is not None or ( + _pkc.default and not _pkc.default.is_callable + ): + break + else: + the_sentinel = tuple(self.primary_key) + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + + return _SentinelColumnCharacterization( + the_sentinel, + sentinel_is_explicit, + sentinel_is_autoinc, + default_characterization, + ) + @property def autoincrement_column(self) -> Optional[Column[int]]: """Returns the :class:`.Column` object which currently represents @@ -1361,6 +1506,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): inherit_cache = True key: str + server_default: Optional[FetchedValue] + def __init__( self, __name_pos: Optional[ @@ -1384,11 +1531,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): ] = SchemaConst.NULL_UNSPECIFIED, onupdate: Optional[Any] = None, primary_key: bool = False, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, quote: Optional[bool] = None, system: bool = False, comment: Optional[str] = None, + insert_sentinel: bool = False, + _omit_from_statements: bool = False, _proxies: Optional[Any] = None, **dialect_kwargs: Any, ): @@ -1873,6 +2022,22 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): :paramref:`_schema.Column.comment` parameter to :class:`_schema.Column`. + :param insert_sentinel: Marks this :class:`_schema.Column` as an + :term:`insert sentinel` used for optimizing the performance of the + :term:`insertmanyvalues` feature for tables that don't + otherwise have qualifying primary key configurations. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :func:`_schema.insert_sentinel` - all in one helper for declaring + sentinel columns + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + """ # noqa: E501, RST201, RST202 @@ -1914,7 +2079,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): self.key = key if key is not None else name # type: ignore self.primary_key = primary_key - + self._insert_sentinel = insert_sentinel + self._omit_from_statements = _omit_from_statements self._user_defined_nullable = udn = nullable if udn is not NULL_UNSPECIFIED: self.nullable = udn @@ -1962,22 +2128,26 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): else: self.onpudate = None - self.server_default = server_default - self.server_onupdate = server_onupdate - - if self.server_default is not None: - if isinstance(self.server_default, FetchedValue): - l_args.append(self.server_default._as_for_update(False)) + if server_default is not None: + if isinstance(server_default, FetchedValue): + server_default = server_default._as_for_update(False) + l_args.append(server_default) else: - l_args.append(DefaultClause(self.server_default)) + server_default = DefaultClause(server_default) + l_args.append(server_default) + self.server_default = server_default - if self.server_onupdate is not None: - if isinstance(self.server_onupdate, FetchedValue): - l_args.append(self.server_onupdate._as_for_update(True)) + if server_onupdate is not None: + if isinstance(server_onupdate, FetchedValue): + server_onupdate = server_onupdate._as_for_update(True) + l_args.append(server_onupdate) else: - l_args.append( - DefaultClause(self.server_onupdate, for_update=True) + server_onupdate = DefaultClause( + server_onupdate, for_update=True ) + l_args.append(server_onupdate) + self.server_onupdate = server_onupdate + self._init_items(*cast(_typing_Sequence[SchemaItem], l_args)) util.set_creation_order(self) @@ -2042,6 +2212,17 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): if isinstance(impl, SchemaEventTarget): impl._set_parent_with_dispatch(self) + @HasMemoized.memoized_attribute + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @HasMemoized.memoized_attribute + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + @util.memoized_property def _gen_static_annotations_cache_key(self) -> bool: # type: ignore """special attribute used by cache key gen, if true, we will @@ -2185,6 +2366,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): all_names[self.name] = self self.table = table + if self._insert_sentinel: + if self.table._sentinel_column is not None: + raise exc.ArgumentError( + "a Table may have only one explicit sentinel column" + ) + self.table._sentinel_column = self + if self.primary_key: table.primary_key._replace(self) elif self.key in table.primary_key: @@ -2316,6 +2504,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): server_onupdate=server_onupdate, doc=self.doc, comment=self.comment, + _omit_from_statements=self._omit_from_statements, + insert_sentinel=self._insert_sentinel, *args, **column_kwargs, ) @@ -2472,6 +2662,56 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): return c.key, c +def insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[_T]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> Column[Any]: + """Provides a surrogate :class:`_schema.Column` that will act as a + dedicated insert :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that + don't otherwise have qualifying primary key configurations. + + Adding this column to a :class:`.Table` object requires that a + corresponding database table actually has this column present, so if adding + it to an existing model, existing database tables would need to be migrated + (e.g. using ALTER TABLE or similar) to include this column. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + The :class:`_schema.Column` returned will be a nullable integer column by + default and make use of a sentinel-specific default generator used only in + "insertmanyvalues" operations. + + .. seealso:: + + :func:`_orm.orm_insert_sentinel` + + :paramref:`_schema.Column.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + return Column( + name=name, + type_=type_api.INTEGERTYPE if type_ is None else type_, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + ) + + class ForeignKey(DialectKWArgs, SchemaItem): """Defines a dependency between two columns. @@ -3058,17 +3298,24 @@ else: class DefaultGenerator(Executable, SchemaItem): - """Base class for column *default* values.""" + """Base class for column *default* values. + + This object is only present on column.default or column.onupdate. + It's not valid as a server default. + + """ __visit_name__ = "default_generator" _is_default_generator = True is_sequence = False + is_identity = False is_server_default = False is_clause_element = False is_callable = False is_scalar = False has_arg = False + is_sentinel = False column: Optional[Column[Any]] def __init__(self, for_update: bool = False) -> None: @@ -3222,6 +3469,44 @@ class ScalarElementColumnDefault(ColumnDefault): ) +class _InsertSentinelColumnDefault(ColumnDefault): + """Default generator that's specific to the use of a "sentinel" column + when using the insertmanyvalues feature. + + This default is used as part of the :func:`_schema.insert_sentinel` + construct. + + """ + + is_sentinel = True + for_update = False + arg = None + + def __new__(cls) -> _InsertSentinelColumnDefault: + return object.__new__(cls) + + def __init__(self) -> None: + pass + + def _set_parent(self, parent: SchemaEventTarget, **kw: Any) -> None: + col = cast("Column[Any]", parent) + if not col._insert_sentinel: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column marked as insert_sentinel=True" + ) + elif not col.nullable: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column that is nullable" + ) + + super()._set_parent(parent, **kw) + + def _copy(self) -> _InsertSentinelColumnDefault: + return _InsertSentinelColumnDefault() + + _SQLExprDefault = Union["ColumnElement[Any]", "TextClause"] @@ -3366,6 +3651,10 @@ class IdentityOptions: self.cache = cache self.order = order + @property + def _increment_is_negative(self) -> bool: + return self.increment is not None and self.increment < 0 + class Sequence(HasSchemaAttr, IdentityOptions, DefaultGenerator): """Represents a named database sequence. @@ -3674,6 +3963,7 @@ class FetchedValue(SchemaEventTarget): reflected = False has_argument = False is_clause_element = False + is_identity = False column: Optional[Column[Any]] @@ -5668,6 +5958,8 @@ class Identity(IdentityOptions, FetchedValue, SchemaItem): __visit_name__ = "identity_column" + is_identity = True + def __init__( self, always: bool = False, diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 5f530ba7d..8a371951e 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -63,6 +63,7 @@ from .base import _EntityNamespace from .base import _expand_cloned from .base import _from_objects from .base import _generative +from .base import _never_select_column from .base import _NoArg from .base import _select_iterables from .base import CacheableOptions @@ -930,7 +931,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): @util.ro_non_memoized_property def _select_iterable(self) -> _SelectIterable: - return self.c + return (c for c in self.c if not _never_select_column(c)) def _init_collections(self) -> None: assert "_columns" not in self.__dict__ diff --git a/lib/sqlalchemy/sql/type_api.py b/lib/sqlalchemy/sql/type_api.py index ddbdb49a0..5af12cb93 100644 --- a/lib/sqlalchemy/sql/type_api.py +++ b/lib/sqlalchemy/sql/type_api.py @@ -94,6 +94,11 @@ class _ResultProcessorType(Protocol[_T_co]): ... +class _SentinelProcessorType(Protocol[_T_co]): + def __call__(self, value: Any) -> Optional[_T_co]: + ... + + class _BaseTypeMemoDict(TypedDict): impl: TypeEngine[Any] result: Dict[Any, Optional[_ResultProcessorType[Any]]] @@ -102,6 +107,7 @@ class _BaseTypeMemoDict(TypedDict): class _TypeMemoDict(_BaseTypeMemoDict, total=False): literal: Optional[_LiteralProcessorType[Any]] bind: Optional[_BindProcessorType[Any]] + sentinel: Optional[_SentinelProcessorType[Any]] custom: Dict[Any, object] @@ -598,6 +604,18 @@ class TypeEngine(Visitable, Generic[_T]): """ return None + def _sentinel_value_resolver( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + """Return an optional callable that will match parameter values + (post-bind processing) to result values + (pre-result-processing), for use in the "sentinel" feature. + + .. versionadded:: 2.0.10 + + """ + return None + @util.memoized_property def _has_bind_expression(self) -> bool: """memoized boolean, check if bind_expression is implemented. @@ -945,6 +963,19 @@ class TypeEngine(Visitable, Generic[_T]): d["result"][coltype] = rp return rp + def _cached_sentinel_value_processor( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + + try: + return dialect._type_memos[self]["sentinel"] + except KeyError: + pass + + d = self._dialect_info(dialect) + d["sentinel"] = bp = d["impl"]._sentinel_value_resolver(dialect) + return bp + def _cached_custom_processor( self, dialect: Dialect, key: str, fn: Callable[[TypeEngine[_T]], _O] ) -> _O: |
