diff options
Diffstat (limited to 'lib/sqlalchemy')
39 files changed, 1836 insertions, 286 deletions
diff --git a/lib/sqlalchemy/__init__.py b/lib/sqlalchemy/__init__.py index 86c5afd8f..ad80a33c9 100644 --- a/lib/sqlalchemy/__init__.py +++ b/lib/sqlalchemy/__init__.py @@ -73,6 +73,7 @@ from .schema import ForeignKey as ForeignKey from .schema import ForeignKeyConstraint as ForeignKeyConstraint from .schema import Identity as Identity from .schema import Index as Index +from .schema import insert_sentinel as insert_sentinel from .schema import MetaData as MetaData from .schema import PrimaryKeyConstraint as PrimaryKeyConstraint from .schema import Sequence as Sequence diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py index f32665792..4a7e48ab8 100644 --- a/lib/sqlalchemy/dialects/mssql/base.py +++ b/lib/sqlalchemy/dialects/mssql/base.py @@ -251,12 +251,16 @@ The process for fetching this value has several variants: INSERT INTO t (x) OUTPUT inserted.id VALUES (?) - .. note:: SQLAlchemy 2.0 introduced the :ref:`engine_insertmanyvalues` - feature for SQL Server, which is used by default to optimize many-row - INSERT statements; however as of SQLAlchemy 2.0.9 this feature is - temporarily disabled for SQL Server, until adjustments can be made - so that the ORM unit of work does not rely upon the ordering of returned - rows. + As of SQLAlchemy 2.0, the :ref:`engine_insertmanyvalues` feature is also + used by default to optimize many-row INSERT statements; for SQL Server + the feature takes place for both RETURNING and-non RETURNING + INSERT statements. + + .. versionchanged:: 2.0.10 The :ref:`engine_insertmanyvalues` feature for + SQL Server was temporarily disabled for SQLAlchemy version 2.0.9 due to + issues with row ordering. As of 2.0.10 the feature is re-enabled, with + special case handling for the unit of work's requirement for RETURNING to + be ordered. * When RETURNING is not available or has been disabled via ``implicit_returning=False``, either the ``scope_identity()`` function or @@ -936,6 +940,7 @@ from ...sql import roles from ...sql import sqltypes from ...sql import util as sql_util from ...sql._typing import is_sql_compiler +from ...sql.compiler import InsertmanyvaluesSentinelOpts from ...types import BIGINT from ...types import BINARY from ...types import CHAR @@ -1514,6 +1519,39 @@ class MSUUid(sqltypes.Uuid): return process + def _sentinel_value_resolver(self, dialect): + """Return a callable that will receive the uuid object or string + as it is normally passed to the DB in the parameter set, after + bind_processor() is called. Convert this value to match + what it would be as coming back from an INSERT..OUTPUT inserted. + + for the UUID type, there are four varieties of settings so here + we seek to convert to the string or UUID representation that comes + back from the driver. + + """ + character_based_uuid = ( + not dialect.supports_native_uuid or not self.native_uuid + ) + + if character_based_uuid: + if self.native_uuid: + # for pyodbc, uuid.uuid() objects are accepted for incoming + # data, as well as strings. but the driver will always return + # uppercase strings in result sets. + def process(value): + return str(value).upper() + + else: + + def process(value): + return str(value) + + return process + else: + # for pymssql, we get uuid.uuid() objects back. + return None + class UNIQUEIDENTIFIER(sqltypes.Uuid[sqltypes._UUID_RETURN]): __visit_name__ = "UNIQUEIDENTIFIER" @@ -2995,13 +3033,18 @@ class MSDialect(default.DefaultDialect): # may be changed at server inspection time for older SQL server versions supports_multivalues_insert = True - # disabled due to #9603 - use_insertmanyvalues = False + use_insertmanyvalues = True # note pyodbc will set this to False if fast_executemany is set, # as of SQLAlchemy 2.0.9 use_insertmanyvalues_wo_returning = True + insertmanyvalues_implicit_sentinel = ( + InsertmanyvaluesSentinelOpts.AUTOINCREMENT + | InsertmanyvaluesSentinelOpts.IDENTITY + | InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + ) + # "The incoming request has too many parameters. The server supports a " # "maximum of 2100 parameters." # in fact you can have 2099 parameters. @@ -3064,14 +3107,6 @@ class MSDialect(default.DefaultDialect): super().__init__(**opts) - if self.use_insertmanyvalues: - raise exc.ArgumentError( - "The use_insertmanyvalues feature on SQL Server is currently " - "not safe to use, as returned result rows may be returned in " - "random order. Ensure use_insertmanyvalues is left at its " - "default of False (this setting changed in SQLAlchemy 2.0.9)" - ) - self._json_serializer = json_serializer self._json_deserializer = json_deserializer diff --git a/lib/sqlalchemy/dialects/mssql/pyodbc.py b/lib/sqlalchemy/dialects/mssql/pyodbc.py index 08c6bc48f..6af527e73 100644 --- a/lib/sqlalchemy/dialects/mssql/pyodbc.py +++ b/lib/sqlalchemy/dialects/mssql/pyodbc.py @@ -290,19 +290,6 @@ Pyodbc have been resolved as of SQLAlchemy 2.0.5. See the notes at Fast Executemany Mode --------------------- - .. note:: SQLAlchemy 2.0 introduced the :ref:`engine_insertmanyvalues` - feature for SQL Server, which is used by default to optimize many-row - INSERT statements; however as of SQLAlchemy 2.0.9 this feature had - to be turned off for SQL Server as the database does not support - deterministic RETURNING of INSERT rows for a multi-row INSERT statement. - -.. versionchanged:: 2.0.9 - ``fast_executemany`` executions will be used - for INSERT statements that don't include RETURNING, when - ``fast_executemany`` is set. Previously, ``use_insertmanyvalues`` would - cause ``fast_executemany`` to not be used in most cases. - - ``use_insertmanyvalues`` is disabled for SQL Server overall as of 2.0.9. - The PyODBC driver includes support for a "fast executemany" mode of execution which greatly reduces round trips for a DBAPI ``executemany()`` call when using Microsoft ODBC drivers, for **limited size batches that fit in memory**. The @@ -316,6 +303,12 @@ Server dialect supports this parameter by passing the "mssql+pyodbc://scott:tiger@mssql2017:1433/test?driver=ODBC+Driver+17+for+SQL+Server", fast_executemany=True) +.. versionchanged:: 2.0.9 - the ``fast_executemany`` parameter now has its + intended effect of this PyODBC feature taking effect for all INSERT + statements that are executed with multiple parameter sets, which don't + include RETURNING. Previously, SQLAlchemy 2.0's :term:`insertmanyvalues` + feature would cause ``fast_executemany`` to not be used in most cases + even if specified. .. versionadded:: 1.3 diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index b5a5b2ca4..eb9ccc606 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -1062,6 +1062,7 @@ from ...sql import operators from ...sql import roles from ...sql import sqltypes from ...sql import util as sql_util +from ...sql.compiler import InsertmanyvaluesSentinelOpts from ...types import BINARY from ...types import BLOB from ...types import BOOLEAN @@ -2414,6 +2415,9 @@ class MySQLDialect(default.DefaultDialect): supports_default_metavalue = True use_insertmanyvalues: bool = True + insertmanyvalues_implicit_sentinel = ( + InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ) supports_sane_rowcount = True supports_sane_multi_rowcount = False diff --git a/lib/sqlalchemy/dialects/mysql/provision.py b/lib/sqlalchemy/dialects/mysql/provision.py index 36a5e9f54..b7faf7712 100644 --- a/lib/sqlalchemy/dialects/mysql/provision.py +++ b/lib/sqlalchemy/dialects/mysql/provision.py @@ -82,7 +82,9 @@ def _mysql_temp_table_keyword_args(cfg, eng): @upsert.for_db("mariadb") -def _upsert(cfg, table, returning, set_lambda=None): +def _upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): from sqlalchemy.dialects.mysql import insert stmt = insert(table) @@ -93,5 +95,7 @@ def _upsert(cfg, table, returning, set_lambda=None): pk1 = table.primary_key.c[0] stmt = stmt.on_duplicate_key_update({pk1.key: pk1}) - stmt = stmt.returning(*returning) + stmt = stmt.returning( + *returning, sort_by_parameter_order=sort_by_parameter_order + ) return stmt diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py index 0fb6295fa..f6f10c476 100644 --- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py +++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py @@ -543,6 +543,11 @@ class _OracleNumeric(sqltypes.Numeric): return handler +class _OracleUUID(sqltypes.Uuid): + def get_dbapi_type(self, dbapi): + return dbapi.STRING + + class _OracleBinaryFloat(_OracleNumeric): def get_dbapi_type(self, dbapi): return dbapi.NATIVE_FLOAT @@ -878,29 +883,9 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext): and is_sql_compiler(self.compiled) and self.compiled._oracle_returning ): - # create a fake cursor result from the out parameters. unlike - # get_out_parameter_values(), the result-row handlers here will be - # applied at the Result level - - numcols = len(self.out_parameters) - - # [stmt_result for stmt_result in outparam.values] == each - # statement in executemany - # [val for val in stmt_result] == each row for a particular - # statement - initial_buffer = list( - zip( - *[ - [ - val - for stmt_result in self.out_parameters[ - f"ret_{j}" - ].values - for val in stmt_result - ] - for j in range(numcols) - ] - ) + + initial_buffer = self.fetchall_for_returning( + self.cursor, _internal=True ) fetch_strategy = _cursor.FullyBufferedCursorFetchStrategy( @@ -921,6 +906,43 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext): return c + def fetchall_for_returning(self, cursor, *, _internal=False): + compiled = self.compiled + if ( + not _internal + and compiled is None + or not is_sql_compiler(compiled) + or not compiled._oracle_returning + ): + raise NotImplementedError( + "execution context was not prepared for Oracle RETURNING" + ) + + # create a fake cursor result from the out parameters. unlike + # get_out_parameter_values(), the result-row handlers here will be + # applied at the Result level + + numcols = len(self.out_parameters) + + # [stmt_result for stmt_result in outparam.values] == each + # statement in executemany + # [val for val in stmt_result] == each row for a particular + # statement + return list( + zip( + *[ + [ + val + for stmt_result in self.out_parameters[ + f"ret_{j}" + ].values + for val in (stmt_result or ()) + ] + for j in range(numcols) + ] + ) + ) + def get_out_parameter_values(self, out_param_names): # this method should not be called when the compiler has # RETURNING as we've turned the has_out_parameters flag set to @@ -942,6 +964,7 @@ class OracleDialect_cx_oracle(OracleDialect): supports_sane_multi_rowcount = True insert_executemany_returning = True + insert_executemany_returning_sort_by_parameter_order = True update_executemany_returning = True delete_executemany_returning = True @@ -974,6 +997,7 @@ class OracleDialect_cx_oracle(OracleDialect): oracle.RAW: _OracleRaw, sqltypes.Unicode: _OracleUnicodeStringCHAR, sqltypes.NVARCHAR: _OracleUnicodeStringNCHAR, + sqltypes.Uuid: _OracleUUID, oracle.NCLOB: _OracleUnicodeTextNCLOB, oracle.ROWID: _OracleRowid, } diff --git a/lib/sqlalchemy/dialects/postgresql/asyncpg.py b/lib/sqlalchemy/dialects/postgresql/asyncpg.py index 2acc5fea3..d1a52afd6 100644 --- a/lib/sqlalchemy/dialects/postgresql/asyncpg.py +++ b/lib/sqlalchemy/dialects/postgresql/asyncpg.py @@ -131,6 +131,7 @@ from typing import TYPE_CHECKING from . import json from . import ranges +from .array import ARRAY as PGARRAY from .base import _DECIMAL_TYPES from .base import _FLOAT_TYPES from .base import _INT_TYPES @@ -157,6 +158,10 @@ if TYPE_CHECKING: from typing import Iterable +class AsyncpgARRAY(PGARRAY): + render_bind_cast = True + + class AsyncpgString(sqltypes.String): render_bind_cast = True @@ -904,6 +909,7 @@ class PGDialect_asyncpg(PGDialect): PGDialect.colspecs, { sqltypes.String: AsyncpgString, + sqltypes.ARRAY: AsyncpgARRAY, REGCONFIG: AsyncpgREGCONFIG, sqltypes.Time: AsyncpgTime, sqltypes.Date: AsyncpgDate, diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 1ce5600e1..ad5e346b7 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -1468,6 +1468,7 @@ from ...sql import expression from ...sql import roles from ...sql import sqltypes from ...sql import util as sql_util +from ...sql.compiler import InsertmanyvaluesSentinelOpts from ...sql.visitors import InternalTraversal from ...types import BIGINT from ...types import BOOLEAN @@ -2911,6 +2912,12 @@ class PGDialect(default.DefaultDialect): postfetch_lastrowid = False use_insertmanyvalues = True + insertmanyvalues_implicit_sentinel = ( + InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + | InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + | InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS + ) + supports_comments = True supports_constraint_comments = True supports_default_values = True diff --git a/lib/sqlalchemy/dialects/postgresql/provision.py b/lib/sqlalchemy/dialects/postgresql/provision.py index 582157604..87f1c9a4c 100644 --- a/lib/sqlalchemy/dialects/postgresql/provision.py +++ b/lib/sqlalchemy/dialects/postgresql/provision.py @@ -130,7 +130,9 @@ def prepare_for_drop_tables(config, connection): @upsert.for_db("postgresql") -def _upsert(cfg, table, returning, set_lambda=None): +def _upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): from sqlalchemy.dialects.postgresql import insert stmt = insert(table) @@ -144,7 +146,9 @@ def _upsert(cfg, table, returning, set_lambda=None): else: stmt = stmt.on_conflict_do_nothing() - stmt = stmt.returning(*returning) + stmt = stmt.returning( + *returning, sort_by_parameter_order=sort_by_parameter_order + ) return stmt diff --git a/lib/sqlalchemy/dialects/sqlite/provision.py b/lib/sqlalchemy/dialects/sqlite/provision.py index 3f86d5a60..2ed8253ab 100644 --- a/lib/sqlalchemy/dialects/sqlite/provision.py +++ b/lib/sqlalchemy/dialects/sqlite/provision.py @@ -174,7 +174,9 @@ def _reap_sqlite_dbs(url, idents): @upsert.for_db("sqlite") -def _upsert(cfg, table, returning, set_lambda=None): +def _upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): from sqlalchemy.dialects.sqlite import insert stmt = insert(table) @@ -184,5 +186,7 @@ def _upsert(cfg, table, returning, set_lambda=None): else: stmt = stmt.on_conflict_do_nothing() - stmt = stmt.returning(*returning) + stmt = stmt.returning( + *returning, sort_by_parameter_order=sort_by_parameter_order + ) return stmt diff --git a/lib/sqlalchemy/engine/base.py b/lib/sqlalchemy/engine/base.py index ba2c44ed7..dac7c9473 100644 --- a/lib/sqlalchemy/engine/base.py +++ b/lib/sqlalchemy/engine/base.py @@ -2020,13 +2020,8 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): if self._echo: stats = context._get_cache_stats() + " (insertmanyvalues)" - for ( - sub_stmt, - sub_params, - setinputsizes, - batchnum, - totalbatches, - ) in dialect._deliver_insertmanyvalues_batches( + + for imv_batch in dialect._deliver_insertmanyvalues_batches( cursor, str_statement, effective_parameters, @@ -2034,20 +2029,25 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): context, ): - if setinputsizes: + if imv_batch.processed_setinputsizes: try: dialect.do_set_input_sizes( - context.cursor, setinputsizes, context + context.cursor, + imv_batch.processed_setinputsizes, + context, ) except BaseException as e: self._handle_dbapi_exception( e, - sql_util._long_statement(sub_stmt), - sub_params, + sql_util._long_statement(imv_batch.replaced_statement), + imv_batch.replaced_parameters, None, context, ) + sub_stmt = imv_batch.replaced_statement + sub_params = imv_batch.replaced_parameters + if engine_events: for fn in self.dispatch.before_cursor_execute: sub_stmt, sub_params = fn( @@ -2063,11 +2063,20 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): self._log_info(sql_util._long_statement(sub_stmt)) - if batchnum > 1: - stats = ( - f"insertmanyvalues batch {batchnum} " - f"of {totalbatches}" - ) + imv_stats = f""" { + imv_batch.batchnum}/{imv_batch.total_batches} ({ + 'ordered' + if imv_batch.rows_sorted else 'unordered' + }{ + '; batch not supported' + if imv_batch.is_downgraded + else '' + })""" + + if imv_batch.batchnum == 1: + stats += imv_stats + else: + stats = f"insertmanyvalues{imv_stats}" if not self.engine.hide_parameters: self._log_info( @@ -2096,7 +2105,12 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): ): break else: - dialect.do_execute(cursor, sub_stmt, sub_params, context) + dialect.do_execute( + cursor, + sub_stmt, + sub_params, + context, + ) except BaseException as e: self._handle_dbapi_exception( diff --git a/lib/sqlalchemy/engine/cursor.py b/lib/sqlalchemy/engine/cursor.py index 1f171ddb0..aaf2c1918 100644 --- a/lib/sqlalchemy/engine/cursor.py +++ b/lib/sqlalchemy/engine/cursor.py @@ -1748,13 +1748,18 @@ class CursorResult(Result[_T]): position in the result. The expected use case here is so that multiple INSERT..RETURNING - statements against different tables can produce a single result - that looks like a JOIN of those two tables. + statements (which definitely need to be sorted) against different + tables can produce a single result that looks like a JOIN of those two + tables. E.g.:: r1 = connection.execute( - users.insert().returning(users.c.user_name, users.c.user_id), + users.insert().returning( + users.c.user_name, + users.c.user_id, + sort_by_parameter_order=True + ), user_values ) @@ -1763,6 +1768,7 @@ class CursorResult(Result[_T]): addresses.c.address_id, addresses.c.address, addresses.c.user_id, + sort_by_parameter_order=True ), address_values ) diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index 462473de2..8992334ee 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -17,6 +17,7 @@ as the base class for their own corresponding classes. from __future__ import annotations import functools +import operator import random import re from time import perf_counter @@ -60,6 +61,7 @@ from ..sql import type_api from ..sql._typing import is_tuple_type from ..sql.base import _NoArg from ..sql.compiler import DDLCompiler +from ..sql.compiler import InsertmanyvaluesSentinelOpts from ..sql.compiler import SQLCompiler from ..sql.elements import quoted_name from ..util.typing import Final @@ -223,6 +225,10 @@ class DefaultDialect(Dialect): use_insertmanyvalues_wo_returning: bool = False + insertmanyvalues_implicit_sentinel: InsertmanyvaluesSentinelOpts = ( + InsertmanyvaluesSentinelOpts.NOT_SUPPORTED + ) + insertmanyvalues_page_size: int = 1000 insertmanyvalues_max_parameters = 32700 @@ -369,13 +375,42 @@ class DefaultDialect(Dialect): and self.delete_returning ) - @property + @util.memoized_property def insert_executemany_returning(self): - return ( - self.insert_returning - and self.supports_multivalues_insert - and self.use_insertmanyvalues - ) + """Default implementation for insert_executemany_returning, if not + otherwise overridden by the specific dialect. + + The default dialect determines "insert_executemany_returning" is + available if the dialect in use has opted into using the + "use_insertmanyvalues" feature. If they haven't opted into that, then + this attribute is False, unless the dialect in question overrides this + and provides some other implementation (such as the Oracle dialect). + + """ + return self.insert_returning and self.use_insertmanyvalues + + @util.memoized_property + def insert_executemany_returning_sort_by_parameter_order(self): + """Default implementation for + insert_executemany_returning_deterministic_order, if not otherwise + overridden by the specific dialect. + + The default dialect determines "insert_executemany_returning" can have + deterministic order only if the dialect in use has opted into using the + "use_insertmanyvalues" feature, which implements deterministic ordering + using client side sentinel columns only by default. The + "insertmanyvalues" feature also features alternate forms that can + use server-generated PK values as "sentinels", but those are only + used if the :attr:`.Dialect.insertmanyvalues_implicit_sentinel` + bitflag enables those alternate SQL forms, which are disabled + by default. + + If the dialect in use hasn't opted into that, then this attribute is + False, unless the dialect in question overrides this and provides some + other implementation (such as the Oracle dialect). + + """ + return self.insert_returning and self.use_insertmanyvalues update_executemany_returning = False delete_executemany_returning = False @@ -725,20 +760,156 @@ class DefaultDialect(Dialect): context = cast(DefaultExecutionContext, context) compiled = cast(SQLCompiler, context.compiled) + imv = compiled._insertmanyvalues + assert imv is not None + is_returning: Final[bool] = bool(compiled.effective_returning) batch_size = context.execution_options.get( "insertmanyvalues_page_size", self.insertmanyvalues_page_size ) + sentinel_value_resolvers = None + if is_returning: - context._insertmanyvalues_rows = result = [] + result: Optional[List[Any]] = [] + context._insertmanyvalues_rows = result + + sort_by_parameter_order = imv.sort_by_parameter_order - for batch_rec in compiled._deliver_insertmanyvalues_batches( - statement, parameters, generic_setinputsizes, batch_size + if imv.num_sentinel_columns: + sentinel_value_resolvers = ( + compiled._imv_sentinel_value_resolvers + ) + else: + sort_by_parameter_order = False + result = None + + for imv_batch in compiled._deliver_insertmanyvalues_batches( + statement, + parameters, + generic_setinputsizes, + batch_size, + sort_by_parameter_order, ): - yield batch_rec + yield imv_batch + if is_returning: - result.extend(cursor.fetchall()) + rows = context.fetchall_for_returning(cursor) + + # I would have thought "is_returning: Final[bool]" + # would have assured this but pylance thinks not + assert result is not None + + if imv.num_sentinel_columns and not imv_batch.is_downgraded: + composite_sentinel = imv.num_sentinel_columns > 1 + if imv.implicit_sentinel: + # for implicit sentinel, which is currently single-col + # integer autoincrement, do a simple sort. + assert not composite_sentinel + result.extend( + sorted(rows, key=operator.itemgetter(-1)) + ) + continue + + # otherwise, create dictionaries to match up batches + # with parameters + assert imv.sentinel_param_keys + + if composite_sentinel: + _nsc = imv.num_sentinel_columns + rows_by_sentinel = { + tuple(row[-_nsc:]): row for row in rows + } + else: + rows_by_sentinel = {row[-1]: row for row in rows} + + if len(rows_by_sentinel) != len(imv_batch.batch): + # see test_insert_exec.py:: + # IMVSentinelTest::test_sentinel_incorrect_rowcount + # for coverage / demonstration + raise exc.InvalidRequestError( + f"Sentinel-keyed result set did not produce " + f"correct number of rows {len(imv_batch.batch)}; " + "produced " + f"{len(rows_by_sentinel)}. Please ensure the " + "sentinel column is fully unique and populated in " + "all cases." + ) + + try: + if composite_sentinel: + if sentinel_value_resolvers: + # composite sentinel (PK) with value resolvers + ordered_rows = [ + rows_by_sentinel[ + tuple( + _resolver(parameters[_spk]) # type: ignore # noqa: E501 + if _resolver + else parameters[_spk] # type: ignore # noqa: E501 + for _resolver, _spk in zip( + sentinel_value_resolvers, + imv.sentinel_param_keys, + ) + ) + ] + for parameters in imv_batch.batch + ] + else: + # composite sentinel (PK) with no value + # resolvers + ordered_rows = [ + rows_by_sentinel[ + tuple( + parameters[_spk] # type: ignore + for _spk in imv.sentinel_param_keys + ) + ] + for parameters in imv_batch.batch + ] + else: + _sentinel_param_key = imv.sentinel_param_keys[0] + if ( + sentinel_value_resolvers + and sentinel_value_resolvers[0] + ): + # single-column sentinel with value resolver + _sentinel_value_resolver = ( + sentinel_value_resolvers[0] + ) + ordered_rows = [ + rows_by_sentinel[ + _sentinel_value_resolver( + parameters[_sentinel_param_key] # type: ignore # noqa: E501 + ) + ] + for parameters in imv_batch.batch + ] + else: + # single-column sentinel with no value resolver + ordered_rows = [ + rows_by_sentinel[ + parameters[_sentinel_param_key] # type: ignore # noqa: E501 + ] + for parameters in imv_batch.batch + ] + except KeyError as ke: + # see test_insert_exec.py:: + # IMVSentinelTest::test_sentinel_cant_match_keys + # for coverage / demonstration + raise exc.InvalidRequestError( + f"Can't match sentinel values in result set to " + f"parameter sets; key {ke.args[0]!r} was not " + "found. " + "There may be a mismatch between the datatype " + "passed to the DBAPI driver vs. that which it " + "returns in a result row. Try using a different " + "datatype, such as integer" + ) from ke + + result.extend(ordered_rows) + + else: + result.extend(rows) def do_executemany(self, cursor, statement, parameters, context=None): cursor.executemany(statement, parameters) @@ -1043,6 +1214,7 @@ class DefaultExecutionContext(ExecutionContext): _empty_dict_params = cast("Mapping[str, Any]", util.EMPTY_DICT) _insertmanyvalues_rows: Optional[List[Tuple[Any, ...]]] = None + _num_sentinel_cols: int = 0 @classmethod def _init_ddl( @@ -1152,6 +1324,17 @@ class DefaultExecutionContext(ExecutionContext): ) elif ( ii + and dml_statement._sort_by_parameter_order + and not self.dialect.insert_executemany_returning_sort_by_parameter_order # noqa: E501 + ): + raise exc.InvalidRequestError( + f"Dialect {self.dialect.dialect_description} with " + f"current server capabilities does not support " + "INSERT..RETURNING with deterministic row ordering " + "when executemany is used" + ) + elif ( + ii and self.dialect.use_insertmanyvalues and not compiled._insertmanyvalues ): @@ -1194,6 +1377,10 @@ class DefaultExecutionContext(ExecutionContext): if len(parameters) > 1: if self.isinsert and compiled._insertmanyvalues: self.execute_style = ExecuteStyle.INSERTMANYVALUES + + imv = compiled._insertmanyvalues + if imv.sentinel_columns is not None: + self._num_sentinel_cols = imv.num_sentinel_columns else: self.execute_style = ExecuteStyle.EXECUTEMANY @@ -1525,6 +1712,9 @@ class DefaultExecutionContext(ExecutionContext): self._is_server_side = False return self.create_default_cursor() + def fetchall_for_returning(self, cursor): + return cursor.fetchall() + def create_default_cursor(self): return self._dbapi_connection.cursor() @@ -1689,6 +1879,13 @@ class DefaultExecutionContext(ExecutionContext): ) if cursor_description is None: strategy = _cursor._NO_CURSOR_DML + elif self._num_sentinel_cols: + assert self.execute_style is ExecuteStyle.INSERTMANYVALUES + if cursor_description: + # strip out the sentinel columns from cursor description + cursor_description = cursor_description[ + 0 : -(self._num_sentinel_cols) + ] result: _cursor.CursorResult[Any] = _cursor.CursorResult( self, strategy, cursor_description @@ -2059,21 +2256,14 @@ class DefaultExecutionContext(ExecutionContext): key_getter = compiled._within_exec_param_key_getter - # pre-determine scalar Python-side defaults - # to avoid many calls of get_insert_default()/ - # get_update_default() + sentinel_counter = 0 + if compiled.insert_prefetch: prefetch_recs = [ ( c, key_getter(c), - ( - c.default.arg, # type: ignore - c.default.is_scalar, - c.default.is_callable, - ) - if c.default and c.default.has_arg - else (None, None, None), + c._default_description_tuple, self.get_insert_default, ) for c in compiled.insert_prefetch @@ -2083,13 +2273,7 @@ class DefaultExecutionContext(ExecutionContext): ( c, key_getter(c), - ( - c.onupdate.arg, # type: ignore - c.onupdate.is_scalar, - c.onupdate.is_callable, - ) - if c.onupdate and c.onupdate.has_arg - else (None, None, None), + c._onupdate_description_tuple, self.get_update_default, ) for c in compiled.update_prefetch @@ -2103,10 +2287,13 @@ class DefaultExecutionContext(ExecutionContext): for ( c, param_key, - (arg, is_scalar, is_callable), + (arg, is_scalar, is_callable, is_sentinel), fallback, ) in prefetch_recs: - if is_scalar: + if is_sentinel: + param[param_key] = sentinel_counter + sentinel_counter += 1 + elif is_scalar: param[param_key] = arg elif is_callable: self.current_column = c diff --git a/lib/sqlalchemy/engine/interfaces.py b/lib/sqlalchemy/engine/interfaces.py index 254aba4bc..0216c155d 100644 --- a/lib/sqlalchemy/engine/interfaces.py +++ b/lib/sqlalchemy/engine/interfaces.py @@ -55,8 +55,10 @@ if TYPE_CHECKING: from ..event import dispatcher from ..exc import StatementError from ..sql import Executable + from ..sql.compiler import _InsertManyValuesBatch from ..sql.compiler import DDLCompiler from ..sql.compiler import IdentifierPreparer + from ..sql.compiler import InsertmanyvaluesSentinelOpts from ..sql.compiler import Linting from ..sql.compiler import SQLCompiler from ..sql.elements import BindParameter @@ -236,14 +238,16 @@ _DBAPIMultiExecuteParams = Union[ _DBAPIAnyExecuteParams = Union[ _DBAPIMultiExecuteParams, _DBAPISingleExecuteParams ] -_DBAPICursorDescription = Tuple[ - str, - "DBAPIType", - Optional[int], - Optional[int], - Optional[int], - Optional[int], - Optional[bool], +_DBAPICursorDescription = Sequence[ + Tuple[ + str, + "DBAPIType", + Optional[int], + Optional[int], + Optional[int], + Optional[int], + Optional[bool], + ] ] _AnySingleExecuteParams = _DBAPISingleExecuteParams @@ -609,9 +613,21 @@ class BindTyping(Enum): aren't. When RENDER_CASTS is used, the compiler will invoke the - :meth:`.SQLCompiler.render_bind_cast` method for each - :class:`.BindParameter` object whose dialect-level type sets the - :attr:`.TypeEngine.render_bind_cast` attribute. + :meth:`.SQLCompiler.render_bind_cast` method for the rendered + string representation of each :class:`.BindParameter` object whose + dialect-level type sets the :attr:`.TypeEngine.render_bind_cast` attribute. + + The :meth:`.SQLCompiler.render_bind_cast` is also used to render casts + for one form of "insertmanyvalues" query, when both + :attr:`.InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT` and + :attr:`.InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS` are set, + where the casts are applied to the intermediary columns e.g. + "INSERT INTO t (a, b, c) SELECT p0::TYP, p1::TYP, p2::TYP " + "FROM (VALUES (?, ?), (?, ?), ...)". + + .. versionadded:: 2.0.10 - :meth:`.SQLCompiler.render_bind_cast` is now + used within some elements of the "insertmanyvalues" implementation. + """ @@ -838,6 +854,14 @@ class Dialect(EventTarget): """ + insert_executemany_returning_sort_by_parameter_order: bool + """dialect / driver / database supports some means of providing + INSERT...RETURNING support when dialect.do_executemany() is used + along with the :paramref:`_dml.Insert.returning.sort_by_parameter_order` + parameter being set. + + """ + update_executemany_returning: bool """dialect supports UPDATE..RETURNING with executemany.""" @@ -881,6 +905,23 @@ class Dialect(EventTarget): .. versionadded:: 2.0 + .. seealso:: + + :ref:`engine_insertmanyvalues` + + """ + + insertmanyvalues_implicit_sentinel: InsertmanyvaluesSentinelOpts + """Options indicating the database supports a form of bulk INSERT where + the autoincrement integer primary key can be reliably used as an ordering + for INSERTed rows. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` + """ insertmanyvalues_page_size: int @@ -2116,15 +2157,7 @@ class Dialect(EventTarget): parameters: _DBAPIMultiExecuteParams, generic_setinputsizes: Optional[_GenericSetInputSizesType], context: ExecutionContext, - ) -> Iterator[ - Tuple[ - str, - _DBAPISingleExecuteParams, - _GenericSetInputSizesType, - int, - int, - ] - ]: + ) -> Iterator[_InsertManyValuesBatch]: """convert executemany parameters for an INSERT into an iterator of statement/single execute values, used by the insertmanyvalues feature. @@ -3112,6 +3145,24 @@ class ExecutionContext: raise NotImplementedError() + def fetchall_for_returning(self, cursor: DBAPICursor) -> Sequence[Any]: + """For a RETURNING result, deliver cursor.fetchall() from the + DBAPI cursor. + + This is a dialect-specific hook for dialects that have special + considerations when calling upon the rows delivered for a + "RETURNING" statement. Default implementation is + ``cursor.fetchall()``. + + This hook is currently used only by the :term:`insertmanyvalues` + feature. Dialects that don't set ``use_insertmanyvalues=True`` + don't need to consider this hook. + + .. versionadded:: 2.0.10 + + """ + raise NotImplementedError() + class ConnectionEventsTarget(EventTarget): """An object which can accept events from :class:`.ConnectionEvents`. diff --git a/lib/sqlalchemy/exc.py b/lib/sqlalchemy/exc.py index 45a7e280e..a5a66de87 100644 --- a/lib/sqlalchemy/exc.py +++ b/lib/sqlalchemy/exc.py @@ -51,17 +51,15 @@ class HasDescriptionCode: self.code = code super().__init__(*arg, **kw) + _what_are_we = "error" + def _code_str(self) -> str: if not self.code: return "" else: return ( - "(Background on this error at: " - "https://sqlalche.me/e/%s/%s)" - % ( - _version_token, - self.code, - ) + f"(Background on this {self._what_are_we} at: " + f"https://sqlalche.me/e/{_version_token}/{self.code})" ) def __str__(self) -> str: @@ -831,3 +829,5 @@ class SAPendingDeprecationWarning(PendingDeprecationWarning): class SAWarning(HasDescriptionCode, RuntimeWarning): """Issued at runtime.""" + + _what_are_we = "warning" diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py index 69cd7f598..7d70d3c7f 100644 --- a/lib/sqlalchemy/orm/__init__.py +++ b/lib/sqlalchemy/orm/__init__.py @@ -32,6 +32,7 @@ from ._orm_constructors import deferred as deferred from ._orm_constructors import dynamic_loader as dynamic_loader from ._orm_constructors import join as join from ._orm_constructors import mapped_column as mapped_column +from ._orm_constructors import orm_insert_sentinel as orm_insert_sentinel from ._orm_constructors import outerjoin as outerjoin from ._orm_constructors import query_expression as query_expression from ._orm_constructors import relationship as relationship diff --git a/lib/sqlalchemy/orm/_orm_constructors.py b/lib/sqlalchemy/orm/_orm_constructors.py index a0fa4da79..563fef3c5 100644 --- a/lib/sqlalchemy/orm/_orm_constructors.py +++ b/lib/sqlalchemy/orm/_orm_constructors.py @@ -42,6 +42,7 @@ from ..exc import InvalidRequestError from ..sql._typing import _no_kw from ..sql.base import _NoArg from ..sql.base import SchemaEventTarget +from ..sql.schema import _InsertSentinelColumnDefault from ..sql.schema import SchemaConst from ..sql.selectable import FromClause from ..util.typing import Annotated @@ -68,7 +69,7 @@ if TYPE_CHECKING: from ..sql._typing import _OnClauseArgument from ..sql._typing import _TypeEngineArgument from ..sql.elements import ColumnElement - from ..sql.schema import _ServerDefaultType + from ..sql.schema import _ServerDefaultArgument from ..sql.schema import FetchedValue from ..sql.selectable import Alias from ..sql.selectable import Subquery @@ -124,7 +125,7 @@ def mapped_column( info: Optional[_InfoType] = None, onupdate: Optional[Any] = None, insert_default: Optional[Any] = _NoArg.NO_ARG, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, active_history: bool = False, quote: Optional[bool] = None, @@ -334,6 +335,56 @@ def mapped_column( ) +def orm_insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[Any]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> MappedColumn[Any]: + """Provides a surrogate :func:`_orm.mapped_column` that generates + a so-called :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that don't + otherwise have qualifying primary key configurations. + + Use of :func:`_orm.orm_insert_sentinel` is analogous to the use of the + :func:`_schema.insert_sentinel` construct within a Core + :class:`_schema.Table` construct. + + Guidelines for adding this construct to a Declarative mapped class + are the same as that of the :func:`_schema.insert_sentinel` construct; + the database table itself also needs to have a column with this name + present. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + .. seealso:: + + :func:`_schema.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + + return mapped_column( + name=name, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + use_existing_column=True, + nullable=True, + ) + + @util.deprecated_params( **{ arg: ( diff --git a/lib/sqlalchemy/orm/bulk_persistence.py b/lib/sqlalchemy/orm/bulk_persistence.py index c096dc3e5..8388d3980 100644 --- a/lib/sqlalchemy/orm/bulk_persistence.py +++ b/lib/sqlalchemy/orm/bulk_persistence.py @@ -131,19 +131,24 @@ def _bulk_insert( return_result: Optional[cursor.CursorResult[Any]] = None - for table, super_mapper in base_mapper._sorted_tables.items(): - if not mapper.isa(super_mapper) or table not in mapper._pks_by_table: - continue + mappers_to_run = [ + (table, mp) + for table, mp in base_mapper._sorted_tables.items() + if table in mapper._pks_by_table + ] + + if return_defaults: + # not used by new-style bulk inserts, only used for legacy + bookkeeping = True + elif len(mappers_to_run) > 1: + # if we have more than one table, mapper to run where we will be + # either horizontally splicing, or copying values between tables, + # we need the "bookkeeping" / deterministic returning order + bookkeeping = True + else: + bookkeeping = False - is_joined_inh_supertable = super_mapper is not mapper - bookkeeping = ( - is_joined_inh_supertable - or return_defaults - or ( - use_orm_insert_stmt is not None - and bool(use_orm_insert_stmt._returning) - ) - ) + for table, super_mapper in mappers_to_run: records = ( ( @@ -173,6 +178,7 @@ def _bulk_insert( render_nulls=render_nulls, ) ) + result = persistence._emit_insert_statements( base_mapper, None, @@ -187,6 +193,7 @@ def _bulk_insert( if not use_orm_insert_stmt._returning or return_result is None: return_result = result elif result.returns_rows: + assert bookkeeping return_result = return_result.splice_horizontally(result) if return_defaults and isstates: @@ -507,9 +514,11 @@ class ORMDMLState(AbstractORMCompileState): dml_level_statement = dml_level_statement.return_defaults( # this is a little weird looking, but by passing # primary key as the main list of cols, this tells - # return_defaults to omit server-default cols. Since - # we have cols_to_return, just return what we asked for - # (plus primary key, which ORM persistence needs since + # return_defaults to omit server-default cols (and + # actually all cols, due to some weird thing we should + # clean up in crud.py). + # Since we have cols_to_return, just return what we asked + # for (plus primary key, which ORM persistence needs since # we likely set bookkeeping=True here, which is another # whole thing...). We dont want to clutter the # statement up with lots of other cols the user didn't diff --git a/lib/sqlalchemy/orm/decl_api.py b/lib/sqlalchemy/orm/decl_api.py index ed001023b..2f8289acf 100644 --- a/lib/sqlalchemy/orm/decl_api.py +++ b/lib/sqlalchemy/orm/decl_api.py @@ -241,6 +241,7 @@ class _declared_attr_common: self, fn: Callable[..., Any], cascading: bool = False, + quiet: bool = False, ): # suppport # @declared_attr @@ -254,6 +255,7 @@ class _declared_attr_common: self.fget = fn self._cascading = cascading + self._quiet = quiet self.__doc__ = fn.__doc__ def _collect_return_annotation(self) -> Optional[Type[Any]]: diff --git a/lib/sqlalchemy/orm/decl_base.py b/lib/sqlalchemy/orm/decl_base.py index beede0ddb..b7d6dd8cf 100644 --- a/lib/sqlalchemy/orm/decl_base.py +++ b/lib/sqlalchemy/orm/decl_base.py @@ -459,6 +459,7 @@ class _ClassScanMapperConfig(_MapperConfig): "mapper_args", "mapper_args_fn", "inherits", + "single", "allow_dataclass_fields", "dataclass_setup_arguments", "is_dataclass_prior_to_mapping", @@ -483,6 +484,7 @@ class _ClassScanMapperConfig(_MapperConfig): table_args: Optional[_TableArgsType] mapper_args_fn: Optional[Callable[[], Dict[str, Any]]] inherits: Optional[Type[Any]] + single: bool is_dataclass_prior_to_mapping: bool allow_unmapped_annotations: bool @@ -527,7 +529,7 @@ class _ClassScanMapperConfig(_MapperConfig): self.declared_columns = util.OrderedSet() self.column_ordering = {} self.column_copies = {} - + self.single = False self.dataclass_setup_arguments = dca = getattr( self.cls, "_sa_apply_dc_transforms", None ) @@ -866,7 +868,7 @@ class _ClassScanMapperConfig(_MapperConfig): # should only be __table__ continue elif class_mapped: - if _is_declarative_props(obj): + if _is_declarative_props(obj) and not obj._quiet: util.warn( "Regular (i.e. not __special__) " "attribute '%s.%s' uses @declared_attr, " @@ -1783,6 +1785,10 @@ class _ClassScanMapperConfig(_MapperConfig): self.inherits = inherits + clsdict_view = self.clsdict_view + if "__table__" not in clsdict_view and self.tablename is None: + self.single = True + def _setup_inheriting_columns(self, mapper_kw: _MapperKwArgs) -> None: table = self.local_table cls = self.cls @@ -1809,6 +1815,7 @@ class _ClassScanMapperConfig(_MapperConfig): ) if table is None: + # single table inheritance. # ensure no table args if table_args: diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py index 81b66de03..731983ff4 100644 --- a/lib/sqlalchemy/orm/mapper.py +++ b/lib/sqlalchemy/orm/mapper.py @@ -3273,6 +3273,9 @@ class Mapper( """ + if column is not None and sql_base._never_select_column(column): + return True + # check for class-bound attributes and/or descriptors, # either local or from an inherited class # ignore dataclass field default values diff --git a/lib/sqlalchemy/orm/persistence.py b/lib/sqlalchemy/orm/persistence.py index a12156eb5..1af55df00 100644 --- a/lib/sqlalchemy/orm/persistence.py +++ b/lib/sqlalchemy/orm/persistence.py @@ -955,8 +955,13 @@ def _emit_insert_statements( # if a user query with RETURNING was passed, we definitely need # to use RETURNING. returning_is_required_anyway = bool(use_orm_insert_stmt._returning) + deterministic_results_reqd = ( + returning_is_required_anyway + and use_orm_insert_stmt._sort_by_parameter_order + ) or bookkeeping else: returning_is_required_anyway = False + deterministic_results_reqd = bookkeeping cached_stmt = base_mapper._memo(("insert", table), table.insert) exec_opt = {"compiled_cache": base_mapper._compiled_cache} @@ -1057,22 +1062,31 @@ def _emit_insert_statements( # know that we are using RETURNING in any case records = list(records) - if ( - not hasvalue - and connection.dialect.insert_executemany_returning - and len(records) > 1 + + if returning_is_required_anyway or ( + not hasvalue and len(records) > 1 ): - do_executemany = True - elif returning_is_required_anyway: - if connection.dialect.insert_executemany_returning: + if ( + deterministic_results_reqd + and connection.dialect.insert_executemany_returning_sort_by_parameter_order # noqa: E501 + ) or ( + not deterministic_results_reqd + and connection.dialect.insert_executemany_returning + ): do_executemany = True - else: + elif returning_is_required_anyway: + if deterministic_results_reqd: + dt = " with RETURNING and sort by parameter order" + else: + dt = " with RETURNING" raise sa_exc.InvalidRequestError( f"Can't use explicit RETURNING for bulk INSERT " f"operation with " f"{connection.dialect.dialect_description} backend; " - f"executemany is not supported with RETURNING" + f"executemany{dt} is not enabled for this dialect." ) + else: + do_executemany = False else: do_executemany = False @@ -1084,13 +1098,19 @@ def _emit_insert_statements( ) ): statement = statement.return_defaults( - *mapper._server_default_cols[table] + *mapper._server_default_cols[table], + sort_by_parameter_order=bookkeeping, ) if mapper.version_id_col is not None: - statement = statement.return_defaults(mapper.version_id_col) + statement = statement.return_defaults( + mapper.version_id_col, + sort_by_parameter_order=bookkeeping, + ) elif do_executemany: - statement = statement.return_defaults(*table.primary_key) + statement = statement.return_defaults( + *table.primary_key, sort_by_parameter_order=bookkeeping + ) if do_executemany: multiparams = [rec[2] for rec in records] diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py index f00775874..916b9d901 100644 --- a/lib/sqlalchemy/orm/properties.py +++ b/lib/sqlalchemy/orm/properties.py @@ -665,7 +665,11 @@ class MappedColumn( ) -> None: column = self.column - if self._use_existing_column and decl_scan.inherits: + if ( + self._use_existing_column + and decl_scan.inherits + and decl_scan.single + ): if decl_scan.is_deferred: raise sa_exc.ArgumentError( "Can't use use_existing_column with deferred mappers" diff --git a/lib/sqlalchemy/schema.py b/lib/sqlalchemy/schema.py index 392487282..19782bd7c 100644 --- a/lib/sqlalchemy/schema.py +++ b/lib/sqlalchemy/schema.py @@ -60,6 +60,7 @@ from .sql.schema import ForeignKeyConstraint as ForeignKeyConstraint from .sql.schema import HasConditionalDDL as HasConditionalDDL from .sql.schema import Identity as Identity from .sql.schema import Index as Index +from .sql.schema import insert_sentinel as insert_sentinel from .sql.schema import MetaData as MetaData from .sql.schema import PrimaryKeyConstraint as PrimaryKeyConstraint from .sql.schema import SchemaConst as SchemaConst diff --git a/lib/sqlalchemy/sql/_typing.py b/lib/sqlalchemy/sql/_typing.py index 14b1b9594..596493b7c 100644 --- a/lib/sqlalchemy/sql/_typing.py +++ b/lib/sqlalchemy/sql/_typing.py @@ -12,6 +12,7 @@ from typing import Any from typing import Callable from typing import Dict from typing import Mapping +from typing import NoReturn from typing import Set from typing import Tuple from typing import Type @@ -364,3 +365,8 @@ def _no_kw() -> exc.ArgumentError: "Additional keyword arguments are not accepted by this " "function/method. The presence of **kw is for pep-484 typing purposes" ) + + +def _unexpected_kw(methname: str, kw: Dict[str, Any]) -> NoReturn: + k = list(kw)[0] + raise TypeError(f"{methname} got an unexpected keyword argument '{k}'") diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 6267fd814..253927770 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -30,6 +30,7 @@ from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping +from typing import NamedTuple from typing import NoReturn from typing import Optional from typing import overload @@ -75,6 +76,8 @@ if TYPE_CHECKING: from .elements import NamedColumn from .elements import SQLCoreOperations from .elements import TextClause + from .schema import Column + from .schema import DefaultGenerator from .selectable import _JoinTargetElement from .selectable import _SelectIterable from .selectable import FromClause @@ -122,6 +125,35 @@ _Fn = TypeVar("_Fn", bound=Callable[..., Any]) _AmbiguousTableNameMap = MutableMapping[str, str] +class _DefaultDescriptionTuple(NamedTuple): + arg: Any + is_scalar: Optional[bool] + is_callable: Optional[bool] + is_sentinel: Optional[bool] + + @classmethod + def _from_column_default( + cls, default: Optional[DefaultGenerator] + ) -> _DefaultDescriptionTuple: + return ( + _DefaultDescriptionTuple( + default.arg, # type: ignore + default.is_scalar, + default.is_callable, + default.is_sentinel, + ) + if default + and ( + default.has_arg + or (not default.for_update and default.is_sentinel) + ) + else _DefaultDescriptionTuple(None, None, None, None) + ) + + +_never_select_column = operator.attrgetter("_omit_from_statements") + + class _EntityNamespace(Protocol): def __getattr__(self, key: str) -> SQLCoreOperations[Any]: ... @@ -1303,6 +1335,25 @@ class SchemaVisitor(ClauseVisitor): __traverse_options__ = {"schema_visitor": True} +class _SentinelDefaultCharacterization(Enum): + NONE = "none" + UNKNOWN = "unknown" + CLIENTSIDE = "clientside" + SENTINEL_DEFAULT = "sentinel_default" + SERVERSIDE = "serverside" + IDENTITY = "identity" + SEQUENCE = "sequence" + + +class _SentinelColumnCharacterization(NamedTuple): + columns: Optional[Sequence[Column[Any]]] = None + is_explicit: bool = False + is_autoinc: bool = False + default_characterization: _SentinelDefaultCharacterization = ( + _SentinelDefaultCharacterization.NONE + ) + + _COLKEY = TypeVar("_COLKEY", Union[None, str], str) _COL_co = TypeVar("_COL_co", bound="ColumnElement[Any]", covariant=True) diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 9c68b311a..554a84112 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -41,6 +41,7 @@ from typing import ClassVar from typing import Dict from typing import FrozenSet from typing import Iterable +from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping @@ -70,6 +71,7 @@ from ._typing import is_column_element from ._typing import is_dml from .base import _from_objects from .base import _NONE_NAME +from .base import _SentinelDefaultCharacterization from .base import Executable from .base import NO_ARG from .elements import ClauseElement @@ -81,6 +83,7 @@ from .visitors import prefix_anon_map from .visitors import Visitable from .. import exc from .. import util +from ..util import FastIntFlag from ..util.typing import Literal from ..util.typing import Protocol from ..util.typing import TypedDict @@ -100,6 +103,7 @@ if typing.TYPE_CHECKING: from .elements import ColumnElement from .elements import Label from .functions import Function + from .schema import Table from .selectable import AliasedReturnsRows from .selectable import CompoundSelectState from .selectable import CTE @@ -109,9 +113,14 @@ if typing.TYPE_CHECKING: from .selectable import Select from .selectable import SelectState from .type_api import _BindProcessorType + from .type_api import _SentinelProcessorType from ..engine.cursor import CursorResultMetaData from ..engine.interfaces import _CoreSingleExecuteParams + from ..engine.interfaces import _DBAPIAnyExecuteParams + from ..engine.interfaces import _DBAPIMultiExecuteParams + from ..engine.interfaces import _DBAPISingleExecuteParams from ..engine.interfaces import _ExecuteOptions + from ..engine.interfaces import _GenericSetInputSizesType from ..engine.interfaces import _MutableCoreSingleExecuteParams from ..engine.interfaces import Dialect from ..engine.interfaces import SchemaTranslateMapType @@ -460,12 +469,160 @@ class ExpandedState(NamedTuple): class _InsertManyValues(NamedTuple): - """represents state to use for executing an "insertmanyvalues" statement""" + """represents state to use for executing an "insertmanyvalues" statement. + + The primary consumers of this object are the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods. + + .. versionadded:: 2.0 + + """ is_default_expr: bool + """if True, the statement is of the form + ``INSERT INTO TABLE DEFAULT VALUES``, and can't be rewritten as a "batch" + + """ + single_values_expr: str + """The rendered "values" clause of the INSERT statement. + + This is typically the parenthesized section e.g. "(?, ?, ?)" or similar. + The insertmanyvalues logic uses this string as a search and replace + target. + + """ + insert_crud_params: List[crud._CrudParamElementStr] + """List of Column / bind names etc. used while rewriting the statement""" + num_positional_params_counted: int + """the number of bound parameters in a single-row statement. + + This count may be larger or smaller than the actual number of columns + targeted in the INSERT, as it accommodates for SQL expressions + in the values list that may have zero or more parameters embedded + within them. + + This count is part of what's used to organize rewritten parameter lists + when batching. + + """ + + sort_by_parameter_order: bool = False + """if the deterministic_returnined_order parameter were used on the + insert. + + All of the attributes following this will only be used if this is True. + + """ + + includes_upsert_behaviors: bool = False + """if True, we have to accommodate for upsert behaviors. + + This will in some cases downgrade "insertmanyvalues" that requests + deterministic ordering. + + """ + + sentinel_columns: Optional[Sequence[Column[Any]]] = None + """List of sentinel columns that were located. + + This list is only here if the INSERT asked for + sort_by_parameter_order=True, + and dialect-appropriate sentinel columns were located. + + .. versionadded:: 2.0.10 + + """ + + num_sentinel_columns: int = 0 + """how many sentinel columns are in the above list, if any. + + This is the same as + ``len(sentinel_columns) if sentinel_columns is not None else 0`` + + """ + + sentinel_param_keys: Optional[Sequence[Union[str, int]]] = None + """parameter str keys / int indexes in each param dictionary / tuple + that would link to the client side "sentinel" values for that row, which + we can use to match up parameter sets to result rows. + + This is only present if sentinel_columns is present and the INSERT + statement actually refers to client side values for these sentinel + columns. + + .. versionadded:: 2.0.10 + + """ + + implicit_sentinel: bool = False + """if True, we have exactly one sentinel column and it uses a server side + value, currently has to generate an incrementing integer value. + + The dialect in question would have asserted that it supports receiving + these values back and sorting on that value as a means of guaranteeing + correlation with the incoming parameter list. + + .. versionadded:: 2.0.10 + + """ + + embed_values_counter: bool = False + """Whether to embed an incrementing integer counter in each parameter + set within the VALUES clause as parameters are batched over. + + This is only used for a specific INSERT..SELECT..VALUES..RETURNING syntax + where a subquery is used to produce value tuples. Current support + includes PostgreSQL, Microsoft SQL Server. + + .. versionadded:: 2.0.10 + + """ + + +class _InsertManyValuesBatch(NamedTuple): + """represents an individual batch SQL statement for insertmanyvalues. + + This is passed through the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods out + to the :class:`.Connection` within the + :meth:`.Connection._exec_insertmany_context` method. + + .. versionadded:: 2.0.10 + + """ + + replaced_statement: str + replaced_parameters: _DBAPIAnyExecuteParams + processed_setinputsizes: Optional[_GenericSetInputSizesType] + batch: Sequence[_DBAPISingleExecuteParams] + batch_size: int + batchnum: int + total_batches: int + rows_sorted: bool + is_downgraded: bool + + +class InsertmanyvaluesSentinelOpts(FastIntFlag): + """bitflag enum indicating styles of PK defaults + which can work as implicit sentinel columns + + """ + + NOT_SUPPORTED = 1 + AUTOINCREMENT = 2 + IDENTITY = 4 + SEQUENCE = 8 + + ANY_AUTOINCREMENT = AUTOINCREMENT | IDENTITY | SEQUENCE + _SUPPORTED_OR_NOT = NOT_SUPPORTED | ANY_AUTOINCREMENT + + USE_INSERT_FROM_SELECT = 16 + RENDER_SELECT_COL_CASTS = 64 class CompilerState(IntEnum): @@ -1484,6 +1641,7 @@ class SQLCompiler(Compiled): if self._insertmanyvalues: positions = [] + single_values_expr = re.sub( self._positional_pattern, find_position, @@ -1499,13 +1657,19 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( single_values_expr=single_values_expr, insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) def _process_numeric(self): @@ -1574,15 +1738,21 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( # This has the numbers (:1, :2) single_values_expr=single_values_expr, # The single binds are instead %s so they can be formatted insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) @util.memoized_property @@ -1611,6 +1781,23 @@ class SQLCompiler(Compiled): if value is not None } + @util.memoized_property + def _imv_sentinel_value_resolvers( + self, + ) -> Optional[Sequence[Optional[_SentinelProcessorType[Any]]]]: + imv = self._insertmanyvalues + if imv is None or imv.sentinel_columns is None: + return None + + sentinel_value_resolvers = [ + _scol.type._cached_sentinel_value_processor(self.dialect) + for _scol in imv.sentinel_columns + ] + if util.NONE_SET.issuperset(sentinel_value_resolvers): + return None + else: + return sentinel_value_resolvers + def is_subquery(self): return len(self.stack) > 1 @@ -5023,27 +5210,111 @@ class SQLCompiler(Compiled): ) return dialect_hints, table_text - def _insert_stmt_should_use_insertmanyvalues(self, statement): - return ( - self.dialect.supports_multivalues_insert - and self.dialect.use_insertmanyvalues - # note self.implicit_returning or self._result_columns - # implies self.dialect.insert_returning capability - and ( - self.dialect.use_insertmanyvalues_wo_returning - or self.implicit_returning - or self._result_columns + # within the realm of "insertmanyvalues sentinel columns", + # these lookups match different kinds of Column() configurations + # to specific backend capabilities. they are broken into two + # lookups, one for autoincrement columns and the other for non + # autoincrement columns + _sentinel_col_non_autoinc_lookup = util.immutabledict( + { + _SentinelDefaultCharacterization.CLIENTSIDE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.SENTINEL_DEFAULT: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.IDENTITY: ( + InsertmanyvaluesSentinelOpts.IDENTITY + ), + _SentinelDefaultCharacterization.SEQUENCE: ( + InsertmanyvaluesSentinelOpts.SEQUENCE + ), + } + ) + _sentinel_col_autoinc_lookup = _sentinel_col_non_autoinc_lookup.union( + { + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts.AUTOINCREMENT + ), + } + ) + + def _get_sentinel_column_for_table( + self, table: Table + ) -> Optional[Sequence[Column[Any]]]: + """given a :class:`.Table`, return a usable sentinel column or + columns for this dialect if any. + + Return None if no sentinel columns could be identified, or raise an + error if a column was marked as a sentinel explicitly but isn't + compatible with this dialect. + + """ + + sentinel_opts = self.dialect.insertmanyvalues_implicit_sentinel + sentinel_characteristics = table._sentinel_column_characteristics + + sent_cols = sentinel_characteristics.columns + + if sent_cols is None: + return None + + if sentinel_characteristics.is_autoinc: + bitmask = self._sentinel_col_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 ) - ) + else: + bitmask = self._sentinel_col_non_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 + ) + + if sentinel_opts & bitmask: + return sent_cols + + if sentinel_characteristics.is_explicit: + # a column was explicitly marked as insert_sentinel=True, + # however it is not compatible with this dialect. they should + # not indicate this column as a sentinel if they need to include + # this dialect. + + # TODO: do we want non-primary key explicit sentinel cols + # that can gracefully degrade for some backends? + # insert_sentinel="degrade" perhaps. not for the initial release. + # I am hoping people are generally not dealing with this sentinel + # business at all. + + # if is_explicit is True, there will be only one sentinel column. + + raise exc.InvalidRequestError( + f"Column {sent_cols[0]} can't be explicitly " + "marked as a sentinel column when using the " + f"{self.dialect.name} dialect, as the " + "particular type of default generation on this column is " + "not currently compatible with this dialect's specific " + f"INSERT..RETURNING syntax which can receive the " + "server-generated value in " + "a deterministic way. To remove this error, remove " + "insert_sentinel=True from primary key autoincrement " + "columns; these columns are automatically used as " + "sentinels for supported dialects in any case." + ) + + return None def _deliver_insertmanyvalues_batches( - self, statement, parameters, generic_setinputsizes, batch_size - ): + self, + statement: str, + parameters: _DBAPIMultiExecuteParams, + generic_setinputsizes: Optional[_GenericSetInputSizesType], + batch_size: int, + sort_by_parameter_order: bool, + ) -> Iterator[_InsertManyValuesBatch]: imv = self._insertmanyvalues assert imv is not None - executemany_values = f"({imv.single_values_expr})" - lenparams = len(parameters) if imv.is_default_expr and not self.dialect.supports_default_metavalue: # backend doesn't support @@ -5058,19 +5329,41 @@ class SQLCompiler(Compiled): # cursor.lastrowid etc. still goes through the more heavyweight # "ExecutionContext per statement" system as it isn't usable # as a generic "RETURNING" approach - for batchnum, param in enumerate(parameters, 1): - yield ( + use_row_at_a_time = True + downgraded = False + elif not self.dialect.supports_multivalues_insert or ( + sort_by_parameter_order + and self._result_columns + and (imv.sentinel_columns is None or imv.includes_upsert_behaviors) + ): + # deterministic order was requested and the compiler could + # not organize sentinel columns for this dialect/statement. + # use row at a time + use_row_at_a_time = True + downgraded = True + else: + use_row_at_a_time = False + downgraded = False + + if use_row_at_a_time: + for batchnum, param in enumerate( + cast("Sequence[_DBAPISingleExecuteParams]", parameters), 1 + ): + yield _InsertManyValuesBatch( statement, param, generic_setinputsizes, + [param], + batch_size, batchnum, lenparams, + sort_by_parameter_order, + downgraded, ) return - else: - statement = statement.replace( - executemany_values, "__EXECMANY_TOKEN__" - ) + + executemany_values = f"({imv.single_values_expr})" + statement = statement.replace(executemany_values, "__EXECMANY_TOKEN__") # Use optional insertmanyvalues_max_parameters # to further shrink the batch size so that there are no more than @@ -5094,7 +5387,7 @@ class SQLCompiler(Compiled): batches = list(parameters) - processed_setinputsizes = None + processed_setinputsizes: Optional[_GenericSetInputSizesType] = None batchnum = 1 total_batches = lenparams // batch_size + ( 1 if lenparams % batch_size else 0 @@ -5124,10 +5417,14 @@ class SQLCompiler(Compiled): ) return formatted + if imv.embed_values_counter: + imv_values_counter = ", _IMV_VALUES_COUNTER" + else: + imv_values_counter = "" formatted_values_clause = f"""({', '.join( apply_placeholders(bind_keys, formatted) for _, _, formatted, bind_keys in insert_crud_params - )})""" + )}{imv_values_counter})""" keys_to_replace = all_keys.intersection( escaped_bind_names.get(key, key) @@ -5143,7 +5440,13 @@ class SQLCompiler(Compiled): formatted_values_clause = "" keys_to_replace = set() base_parameters = {} - executemany_values_w_comma = f"({imv.single_values_expr}), " + + if imv.embed_values_counter: + executemany_values_w_comma = ( + f"({imv.single_values_expr}, _IMV_VALUES_COUNTER), " + ) + else: + executemany_values_w_comma = f"({imv.single_values_expr}), " all_names_we_will_expand: Set[str] = set() for elem in imv.insert_crud_params: @@ -5176,7 +5479,7 @@ class SQLCompiler(Compiled): ) while batches: - batch = batches[0:batch_size] + batch = cast("Sequence[Any]", batches[0:batch_size]) batches[0:batch_size] = [] if generic_setinputsizes: @@ -5196,7 +5499,7 @@ class SQLCompiler(Compiled): if self.positional: num_ins_params = imv.num_positional_params_counted - batch_iterator: Iterable[Tuple[Any, ...]] + batch_iterator: Iterable[Sequence[Any]] if num_ins_params == len(batch[0]): extra_params_left = extra_params_right = () batch_iterator = batch @@ -5208,9 +5511,19 @@ class SQLCompiler(Compiled): for b in batch ) - expanded_values_string = ( - executemany_values_w_comma * len(batch) - )[:-2] + if imv.embed_values_counter: + expanded_values_string = ( + "".join( + executemany_values_w_comma.replace( + "_IMV_VALUES_COUNTER", str(i) + ) + for i, _ in enumerate(batch) + ) + )[:-2] + else: + expanded_values_string = ( + (executemany_values_w_comma * len(batch)) + )[:-2] if self._numeric_binds and num_ins_params > 0: # numeric will always number the parameters inside of @@ -5254,12 +5567,14 @@ class SQLCompiler(Compiled): replaced_parameters = base_parameters.copy() for i, param in enumerate(batch): - replaced_values_clauses.append( - formatted_values_clause.replace( - "EXECMANY_INDEX__", str(i) - ) + + fmv = formatted_values_clause.replace( + "EXECMANY_INDEX__", str(i) ) + if imv.embed_values_counter: + fmv = fmv.replace("_IMV_VALUES_COUNTER", str(i)) + replaced_values_clauses.append(fmv) replaced_parameters.update( {f"{key}__{i}": param[key] for key in keys_to_replace} ) @@ -5269,12 +5584,16 @@ class SQLCompiler(Compiled): ", ".join(replaced_values_clauses), ) - yield ( + yield _InsertManyValuesBatch( replaced_statement, replaced_parameters, processed_setinputsizes, + batch, + batch_size, batchnum, total_batches, + sort_by_parameter_order, + False, ) batchnum += 1 @@ -5360,6 +5679,13 @@ class SQLCompiler(Compiled): "version settings does not support " "in-place multirow inserts." % self.dialect.name ) + elif ( + self.implicit_returning or insert_stmt._returning + ) and insert_stmt._sort_by_parameter_order: + raise exc.CompileError( + "RETURNING cannot be determinstically sorted when " + "using an INSERT which includes multi-row values()." + ) crud_params_single = crud_params_struct.single_params else: crud_params_single = crud_params_struct.single_params @@ -5390,11 +5716,82 @@ class SQLCompiler(Compiled): [expr for _, expr, _, _ in crud_params_single] ) - if self.implicit_returning or insert_stmt._returning: + # look for insertmanyvalues attributes that would have been configured + # by crud.py as it scanned through the columns to be part of the + # INSERT + use_insertmanyvalues = crud_params_struct.use_insertmanyvalues + named_sentinel_params: Optional[Sequence[str]] = None + add_sentinel_cols = None + implicit_sentinel = False + + returning_cols = self.implicit_returning or insert_stmt._returning + if returning_cols: + + add_sentinel_cols = crud_params_struct.use_sentinel_columns + + if add_sentinel_cols is not None: + assert use_insertmanyvalues + + # search for the sentinel column explicitly present + # in the INSERT columns list, and additionally check that + # this column has a bound parameter name set up that's in the + # parameter list. If both of these cases are present, it means + # we will have a client side value for the sentinel in each + # parameter set. + + _params_by_col = { + col: param_names + for col, _, _, param_names in crud_params_single + } + named_sentinel_params = [] + for _add_sentinel_col in add_sentinel_cols: + if _add_sentinel_col not in _params_by_col: + named_sentinel_params = None + break + param_name = self._within_exec_param_key_getter( + _add_sentinel_col + ) + if param_name not in _params_by_col[_add_sentinel_col]: + named_sentinel_params = None + break + named_sentinel_params.append(param_name) + + if named_sentinel_params is None: + # if we are not going to have a client side value for + # the sentinel in the parameter set, that means it's + # an autoincrement, an IDENTITY, or a server-side SQL + # expression like nextval('seqname'). So this is + # an "implicit" sentinel; we will look for it in + # RETURNING + # only, and then sort on it. For this case on PG, + # SQL Server we have to use a special INSERT form + # that guarantees the server side function lines up with + # the entries in the VALUES. + if ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ): + implicit_sentinel = True + else: + # here, we are not using a sentinel at all + # and we are likely the SQLite dialect. + # The first add_sentinel_col that we have should not + # be marked as "insert_sentinel=True". if it was, + # an error should have been raised in + # _get_sentinel_column_for_table. + assert not add_sentinel_cols[0]._insert_sentinel, ( + "sentinel selection rules should have prevented " + "us from getting here for this dialect" + ) + + # always put the sentinel columns last. even if they are + # in the returning list already, they will be there twice + # then. + returning_cols = list(returning_cols) + list(add_sentinel_cols) returning_clause = self.returning_clause( insert_stmt, - self.implicit_returning or insert_stmt._returning, + returning_cols, populate_result_map=toplevel, ) @@ -5423,9 +5820,8 @@ class SQLCompiler(Compiled): text += " %s" % select_text elif not crud_params_single and supports_default_values: text += " DEFAULT VALUES" - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + self._insertmanyvalues = _InsertManyValues( True, self.dialect.default_metavalue_token, @@ -5433,6 +5829,17 @@ class SQLCompiler(Compiled): "List[crud._CrudParamElementStr]", crud_params_single ), counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + implicit_sentinel=implicit_sentinel, ) elif compile_state._has_multi_parameters: text += " VALUES %s" % ( @@ -5440,11 +5847,9 @@ class SQLCompiler(Compiled): "(%s)" % (", ".join(value for _, _, value, _ in crud_param_set)) for crud_param_set in crud_params_struct.all_multi_params - ) + ), ) else: - # TODO: why is third element of crud_params_single not str - # already? insert_single_values_expr = ", ".join( [ value @@ -5455,20 +5860,90 @@ class SQLCompiler(Compiled): ] ) - text += " VALUES (%s)" % insert_single_values_expr - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + + if ( + implicit_sentinel + and ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + ) + # this is checking if we have + # INSERT INTO table (id) VALUES (DEFAULT). + and not (crud_params_struct.is_default_metavalue_only) + ): + # if we have a sentinel column that is server generated, + # then for selected backends render the VALUES list as a + # subquery. This is the orderable form supported by + # PostgreSQL and SQL Server. + embed_sentinel_value = True + + render_bind_casts = ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS + ) + + colnames = ", ".join( + f"p{i}" for i, _ in enumerate(crud_params_single) + ) + + if render_bind_casts: + # render casts for the SELECT list. For PG, we are + # already rendering bind casts in the parameter list, + # selectively for the more "tricky" types like ARRAY. + # however, even for the "easy" types, if the parameter + # is NULL for every entry, PG gives up and says + # "it must be TEXT", which fails for other easy types + # like ints. So we cast on this side too. + colnames_w_cast = ", ".join( + self.render_bind_cast( + col.type, + col.type._unwrapped_dialect_impl(self.dialect), + f"p{i}", + ) + for i, (col, *_) in enumerate(crud_params_single) + ) + else: + colnames_w_cast = colnames + + text += ( + f" SELECT {colnames_w_cast} FROM " + f"(VALUES ({insert_single_values_expr})) " + f"AS imp_sen({colnames}, sen_counter) " + "ORDER BY sen_counter" + ) + else: + # otherwise, if no sentinel or backend doesn't support + # orderable subquery form, use a plain VALUES list + embed_sentinel_value = False + text += f" VALUES ({insert_single_values_expr})" + self._insertmanyvalues = _InsertManyValues( - False, - insert_single_values_expr, - cast( + is_default_expr=False, + single_values_expr=insert_single_values_expr, + insert_crud_params=cast( "List[crud._CrudParamElementStr]", crud_params_single, ), - counted_bindparam, + num_positional_params_counted=counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + sentinel_param_keys=named_sentinel_params, + implicit_sentinel=implicit_sentinel, + embed_values_counter=embed_sentinel_value, ) + else: + text += f" VALUES ({insert_single_values_expr})" + if insert_stmt._post_values_clause is not None: post_values_clause = self.process( insert_stmt._post_values_clause, **kw diff --git a/lib/sqlalchemy/sql/crud.py b/lib/sqlalchemy/sql/crud.py index 04b62d1ff..563f61c04 100644 --- a/lib/sqlalchemy/sql/crud.py +++ b/lib/sqlalchemy/sql/crud.py @@ -34,6 +34,7 @@ from . import coercions from . import dml from . import elements from . import roles +from .base import _DefaultDescriptionTuple from .dml import isinsert as _compile_state_isinsert from .elements import ColumnClause from .schema import default_is_clause_element @@ -53,6 +54,7 @@ if TYPE_CHECKING: from .elements import ColumnElement from .elements import KeyedColumnElement from .schema import _SQLExprDefault + from .schema import Column REQUIRED = util.symbol( "REQUIRED", @@ -79,20 +81,22 @@ def _as_dml_column(c: ColumnElement[Any]) -> ColumnClause[Any]: _CrudParamElement = Tuple[ "ColumnElement[Any]", - str, - Optional[Union[str, "_SQLExprDefault"]], + str, # column name + Optional[ + Union[str, "_SQLExprDefault"] + ], # bound parameter string or SQL expression to apply Iterable[str], ] _CrudParamElementStr = Tuple[ "KeyedColumnElement[Any]", str, # column name - str, # placeholder + str, # bound parameter string Iterable[str], ] _CrudParamElementSQLExpr = Tuple[ "ColumnClause[Any]", str, - "_SQLExprDefault", + "_SQLExprDefault", # SQL expression to apply Iterable[str], ] @@ -101,8 +105,10 @@ _CrudParamSequence = List[_CrudParamElement] class _CrudParams(NamedTuple): single_params: _CrudParamSequence - all_multi_params: List[Sequence[_CrudParamElementStr]] + is_default_metavalue_only: bool = False + use_insertmanyvalues: bool = False + use_sentinel_columns: Optional[Sequence[Column[Any]]] = None def _get_crud_params( @@ -206,6 +212,7 @@ def _get_crud_params( (c.key,), ) for c in stmt.table.columns + if not c._omit_from_statements ], [], ) @@ -301,8 +308,10 @@ def _get_crud_params( toplevel, kw, ) + use_insertmanyvalues = False + use_sentinel_columns = None else: - _scan_cols( + use_insertmanyvalues, use_sentinel_columns = _scan_cols( compiler, stmt, compile_state, @@ -328,6 +337,8 @@ def _get_crud_params( % (", ".join("%s" % (c,) for c in check)) ) + is_default_metavalue_only = False + if ( _compile_state_isinsert(compile_state) and compile_state._has_multi_parameters @@ -363,8 +374,15 @@ def _get_crud_params( (), ) ] - - return _CrudParams(values, []) + is_default_metavalue_only = True + + return _CrudParams( + values, + [], + is_default_metavalue_only=is_default_metavalue_only, + use_insertmanyvalues=use_insertmanyvalues, + use_sentinel_columns=use_sentinel_columns, + ) @overload @@ -527,7 +545,19 @@ def _scan_insert_from_select_cols( if stmt.include_insert_from_select_defaults: col_set = set(cols) for col in stmt.table.columns: - if col not in col_set and col.default: + # omit columns that were not in the SELECT statement. + # this will omit columns marked as omit_from_statements naturally, + # as long as that col was not explicit in the SELECT. + # if an omit_from_statements col has a "default" on it, then + # we need to include it, as these defaults should still fire off. + # but, if it has that default and it's the "sentinel" default, + # we don't do sentinel default operations for insert_from_select + # here so we again omit it. + if ( + col not in col_set + and col.default + and not col.default.is_sentinel + ): cols.append(col) for c in cols: @@ -579,6 +609,8 @@ def _scan_cols( implicit_returning, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) = _get_returning_modifiers(compiler, stmt, compile_state, toplevel) assert compile_state.isupdate or compile_state.isinsert @@ -672,9 +704,12 @@ def _scan_cols( elif c.default is not None: # column has a default, but it's not a pk column, or it is but # we don't need to get the pk back. - _append_param_insert_hasdefault( - compiler, stmt, c, implicit_return_defaults, values, kw - ) + if not c.default.is_sentinel or ( + use_sentinel_columns is not None + ): + _append_param_insert_hasdefault( + compiler, stmt, c, implicit_return_defaults, values, kw + ) elif c.server_default is not None: # column has a DDL-level default, and is either not a pk @@ -730,6 +765,8 @@ def _scan_cols( if c in remaining_supplemental ) + return (use_insertmanyvalues, use_sentinel_columns) + def _setup_delete_return_defaults( compiler, @@ -744,7 +781,7 @@ def _setup_delete_return_defaults( toplevel, kw, ): - (_, _, implicit_return_defaults, _) = _get_returning_modifiers( + (_, _, implicit_return_defaults, *_) = _get_returning_modifiers( compiler, stmt, compile_state, toplevel ) @@ -1248,6 +1285,18 @@ class _multiparam_column(elements.ColumnElement[Any]): and other.original == self.original ) + @util.memoized_property + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @util.memoized_property + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + def _process_multiparam_default_bind( compiler: SQLCompiler, @@ -1459,16 +1508,15 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): """ + dialect = compiler.dialect + need_pks = ( toplevel and _compile_state_isinsert(compile_state) and not stmt._inline and ( not compiler.for_executemany - or ( - compiler.dialect.insert_executemany_returning - and stmt._return_defaults - ) + or (dialect.insert_executemany_returning and stmt._return_defaults) ) and not stmt._returning # and (not stmt._returning or stmt._return_defaults) @@ -1479,7 +1527,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # after the INSERT if that's all we need. postfetch_lastrowid = ( need_pks - and compiler.dialect.postfetch_lastrowid + and dialect.postfetch_lastrowid and stmt.table._autoincrement_column is not None ) @@ -1491,7 +1539,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): need_pks # the dialect can veto it if it just doesnt support RETURNING # with INSERT - and compiler.dialect.insert_returning + and dialect.insert_returning # user-defined implicit_returning on Table can veto it and compile_state._primary_table.implicit_returning # the compile_state can veto it (SQlite uses this to disable @@ -1506,10 +1554,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # and a lot of weird use cases are supported by it. # SQLite lastrowid times 3x faster than returning, # Mariadb lastrowid 2x faster than returning - ( - not postfetch_lastrowid - or compiler.dialect.favor_returning_over_lastrowid - ) + (not postfetch_lastrowid or dialect.favor_returning_over_lastrowid) or compile_state._has_multi_parameters or stmt._return_defaults ) @@ -1521,25 +1566,57 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): should_implicit_return_defaults = ( implicit_returning and stmt._return_defaults ) + explicit_returning = should_implicit_return_defaults or stmt._returning + use_insertmanyvalues = ( + toplevel + and compiler.for_executemany + and dialect.use_insertmanyvalues + and ( + explicit_returning or dialect.use_insertmanyvalues_wo_returning + ) + ) + + use_sentinel_columns = None + if ( + use_insertmanyvalues + and explicit_returning + and stmt._sort_by_parameter_order + ): + use_sentinel_columns = compiler._get_sentinel_column_for_table( + stmt.table + ) + elif compile_state.isupdate: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.update_returning + and dialect.update_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None elif compile_state.isdelete: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.delete_returning + and dialect.delete_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None else: should_implicit_return_defaults = False # pragma: no cover + use_insertmanyvalues = False + use_sentinel_columns = None if should_implicit_return_defaults: if not stmt._return_defaults_columns: + # TODO: this is weird. See #9685 where we have to + # take an extra step to prevent this from happening. why + # would this ever be *all* columns? but if we set to blank, then + # that seems to break things also in the ORM. So we should + # try to clean this up and figure out what return_defaults + # needs to do w/ the ORM etc. here implicit_return_defaults = set(stmt.table.c) else: implicit_return_defaults = set(stmt._return_defaults_columns) @@ -1551,6 +1628,8 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): implicit_returning or should_implicit_return_defaults, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py index 830f845b4..911061640 100644 --- a/lib/sqlalchemy/sql/dml.py +++ b/lib/sqlalchemy/sql/dml.py @@ -32,8 +32,8 @@ from typing import Union from . import coercions from . import roles from . import util as sql_util -from ._typing import _no_kw from ._typing import _TP +from ._typing import _unexpected_kw from ._typing import is_column_element from ._typing import is_named_from_clause from .base import _entity_namespace_key @@ -455,6 +455,7 @@ class UpdateBase( self, *cols: _DMLColumnArgument, supplemental_cols: Optional[Iterable[_DMLColumnArgument]] = None, + sort_by_parameter_order: bool = False, ) -> Self: """Make use of a :term:`RETURNING` clause for the purpose of fetching server-side expressions and defaults, for supporting @@ -603,6 +604,20 @@ class UpdateBase( .. versionadded:: 2.0 + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT + .. seealso:: :meth:`.UpdateBase.returning` @@ -636,7 +651,13 @@ class UpdateBase( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) self._return_defaults = True - + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to " + "return_defaults() only applies to INSERT statements" + ) + self._sort_by_parameter_order = True if supplemental_cols: # uniquifying while also maintaining order (the maintain of order # is for test suites but also for vertical splicing @@ -661,7 +682,10 @@ class UpdateBase( @_generative def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> UpdateBase: r"""Add a :term:`RETURNING` or equivalent clause to this statement. @@ -723,6 +747,25 @@ class UpdateBase( read the documentation notes for the database in use in order to determine the availability of RETURNING. + :param \*cols: series of columns, SQL expressions, or whole tables + entities to be returned. + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT (Core level discussion) + + :ref:`orm_queryguide_bulk_insert_returning_ordered` - example of + use with :ref:`orm_queryguide_bulk_insert` (ORM level discussion) + .. seealso:: :meth:`.UpdateBase.return_defaults` - an alternative method tailored @@ -733,7 +776,7 @@ class UpdateBase( """ # noqa: E501 if __kw: - raise _no_kw() + raise _unexpected_kw("UpdateBase.returning()", __kw) if self._return_defaults: raise exc.InvalidRequestError( "return_defaults() is already configured on this statement" @@ -741,6 +784,13 @@ class UpdateBase( self._returning += tuple( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to returning() " + "only applies to INSERT statements" + ) + self._sort_by_parameter_order = True return self def corresponding_column( @@ -1123,6 +1173,8 @@ class Insert(ValuesBase): select = None include_insert_from_select_defaults = False + _sort_by_parameter_order: bool = False + is_insert = True table: TableClause @@ -1143,6 +1195,7 @@ class Insert(ValuesBase): "_return_defaults_columns", InternalTraversal.dp_clauseelement_tuple, ), + ("_sort_by_parameter_order", InternalTraversal.dp_boolean), ] + HasPrefixes._has_prefixes_traverse_internals + DialectKWArgs._dialect_kwargs_traverse_internals @@ -1231,24 +1284,35 @@ class Insert(ValuesBase): if TYPE_CHECKING: - # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 + # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 ", *, sort_by_parameter_order: bool = False" # noqa: E501 # code within this block is **programmatically, # statically generated** by tools/generate_tuple_map_overloads.py @overload - def returning(self, __ent0: _TCCA[_T0]) -> ReturningInsert[Tuple[_T0]]: + def returning( + self, __ent0: _TCCA[_T0], *, sort_by_parameter_order: bool = False + ) -> ReturningInsert[Tuple[_T0]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1], __ent2: _TCCA[_T2] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + __ent2: _TCCA[_T2], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2]]: ... @@ -1259,6 +1323,8 @@ class Insert(ValuesBase): __ent1: _TCCA[_T1], __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3]]: ... @@ -1270,6 +1336,8 @@ class Insert(ValuesBase): __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4]]: ... @@ -1282,6 +1350,8 @@ class Insert(ValuesBase): __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5]]: ... @@ -1295,6 +1365,8 @@ class Insert(ValuesBase): __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6]]: ... @@ -1309,6 +1381,8 @@ class Insert(ValuesBase): __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], __ent7: _TCCA[_T7], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7]]: ... @@ -1316,12 +1390,18 @@ class Insert(ValuesBase): @overload def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 0f356ae27..694faee5f 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -91,7 +91,6 @@ if typing.TYPE_CHECKING: from .compiler import SQLCompiler from .functions import FunctionElement from .operators import OperatorType - from .schema import _ServerDefaultType from .schema import Column from .schema import DefaultGenerator from .schema import FetchedValue @@ -1263,6 +1262,8 @@ class ColumnElement( primary_key: bool = False _is_clone_of: Optional[ColumnElement[_T]] _is_column_element = True + _insert_sentinel: bool = False + _omit_from_statements = False foreign_keys: AbstractSet[ForeignKey] = frozenset() @@ -2212,6 +2213,8 @@ class TextClause( _render_label_in_columns_clause = False + _omit_from_statements = False + @property def _hide_froms(self) -> Iterable[FromClause]: return () @@ -4667,7 +4670,7 @@ class ColumnClause( onupdate: Optional[DefaultGenerator] = None default: Optional[DefaultGenerator] = None - server_default: Optional[_ServerDefaultType] = None + server_default: Optional[FetchedValue] = None server_onupdate: Optional[FetchedValue] = None _is_multiparam_column = False diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index 7d964ad05..192096469 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -58,7 +58,10 @@ from . import ddl from . import roles from . import type_api from . import visitors +from .base import _DefaultDescriptionTuple from .base import _NoneName +from .base import _SentinelColumnCharacterization +from .base import _SentinelDefaultCharacterization from .base import DedupeColumnCollection from .base import DialectKWArgs from .base import Executable @@ -77,6 +80,7 @@ from .. import event from .. import exc from .. import inspection from .. import util +from ..util import HasMemoized from ..util.typing import Final from ..util.typing import Literal from ..util.typing import Protocol @@ -107,7 +111,6 @@ if typing.TYPE_CHECKING: _T = TypeVar("_T", bound="Any") _SI = TypeVar("_SI", bound="SchemaItem") -_ServerDefaultType = Union["FetchedValue", str, TextClause, ColumnElement[Any]] _TAB = TypeVar("_TAB", bound="Table") @@ -115,6 +118,10 @@ _CreateDropBind = Union["Engine", "Connection", "MockConnection"] _ConstraintNameArgument = Optional[Union[str, _NoneName]] +_ServerDefaultArgument = Union[ + "FetchedValue", str, TextClause, ColumnElement[Any] +] + class SchemaConst(Enum): @@ -345,6 +352,8 @@ class Table( _columns: DedupeColumnCollection[Column[Any]] + _sentinel_column: Optional[Column[Any]] + constraints: Set[Constraint] """A collection of all :class:`_schema.Constraint` objects associated with this :class:`_schema.Table`. @@ -819,6 +828,8 @@ class Table( assert isinstance(schema, str) self.schema = quoted_name(schema, quote_schema) + self._sentinel_column = None + self.indexes = set() self.constraints = set() PrimaryKeyConstraint( @@ -1005,6 +1016,140 @@ class Table( def _autoincrement_column(self) -> Optional[Column[int]]: return self.primary_key._autoincrement_column + @util.ro_memoized_property + def _sentinel_column_characteristics( + self, + ) -> _SentinelColumnCharacterization: + """determine a candidate column (or columns, in case of a client + generated composite primary key) which can be used as an + "insert sentinel" for an INSERT statement. + + The returned structure, :class:`_SentinelColumnCharacterization`, + includes all the details needed by :class:`.Dialect` and + :class:`.SQLCompiler` to determine if these column(s) can be used + as an INSERT..RETURNING sentinel for a particular database + dialect. + + .. versionadded:: 2.0.10 + + """ + + sentinel_is_explicit = False + sentinel_is_autoinc = False + the_sentinel: Optional[_typing_Sequence[Column[Any]]] = None + + # see if a column was explicitly marked "insert_sentinel=True". + explicit_sentinel_col = self._sentinel_column + + if explicit_sentinel_col is not None: + the_sentinel = (explicit_sentinel_col,) + sentinel_is_explicit = True + + autoinc_col = self._autoincrement_column + if sentinel_is_explicit and explicit_sentinel_col is autoinc_col: + assert autoinc_col is not None + sentinel_is_autoinc = True + elif explicit_sentinel_col is None and autoinc_col is not None: + the_sentinel = (autoinc_col,) + sentinel_is_autoinc = True + + default_characterization = _SentinelDefaultCharacterization.UNKNOWN + + if the_sentinel: + the_sentinel_zero = the_sentinel[0] + if the_sentinel_zero.identity: + + if the_sentinel_zero.identity._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use IDENTITY default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + else: + default_characterization = ( + _SentinelDefaultCharacterization.IDENTITY + ) + elif ( + the_sentinel_zero.default is None + and the_sentinel_zero.server_default is None + ): + if the_sentinel_zero.nullable: + raise exc.InvalidRequestError( + f"Column {the_sentinel_zero} has been marked as a " + "sentinel " + "column with no default generation function; it " + "at least needs to be marked nullable=False assuming " + "user-populated sentinel values will be used." + ) + default_characterization = ( + _SentinelDefaultCharacterization.NONE + ) + elif the_sentinel_zero.default is not None: + if the_sentinel_zero.default.is_sentinel: + default_characterization = ( + _SentinelDefaultCharacterization.SENTINEL_DEFAULT + ) + elif default_is_sequence(the_sentinel_zero.default): + + if the_sentinel_zero.default._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use SEQUENCE default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + + default_characterization = ( + _SentinelDefaultCharacterization.SEQUENCE + ) + elif the_sentinel_zero.default.is_callable: + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + elif the_sentinel_zero.server_default is not None: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + f"Column {the_sentinel[0]} can't be a sentinel column " + "because it uses an explicit server side default " + "that's not the Identity() default." + ) + + default_characterization = ( + _SentinelDefaultCharacterization.SERVERSIDE + ) + + if the_sentinel is None and self.primary_key: + assert autoinc_col is None + + # determine for non-autoincrement pk if all elements are + # client side + for _pkc in self.primary_key: + if _pkc.server_default is not None or ( + _pkc.default and not _pkc.default.is_callable + ): + break + else: + the_sentinel = tuple(self.primary_key) + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + + return _SentinelColumnCharacterization( + the_sentinel, + sentinel_is_explicit, + sentinel_is_autoinc, + default_characterization, + ) + @property def autoincrement_column(self) -> Optional[Column[int]]: """Returns the :class:`.Column` object which currently represents @@ -1361,6 +1506,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): inherit_cache = True key: str + server_default: Optional[FetchedValue] + def __init__( self, __name_pos: Optional[ @@ -1384,11 +1531,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): ] = SchemaConst.NULL_UNSPECIFIED, onupdate: Optional[Any] = None, primary_key: bool = False, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, quote: Optional[bool] = None, system: bool = False, comment: Optional[str] = None, + insert_sentinel: bool = False, + _omit_from_statements: bool = False, _proxies: Optional[Any] = None, **dialect_kwargs: Any, ): @@ -1873,6 +2022,22 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): :paramref:`_schema.Column.comment` parameter to :class:`_schema.Column`. + :param insert_sentinel: Marks this :class:`_schema.Column` as an + :term:`insert sentinel` used for optimizing the performance of the + :term:`insertmanyvalues` feature for tables that don't + otherwise have qualifying primary key configurations. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :func:`_schema.insert_sentinel` - all in one helper for declaring + sentinel columns + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + """ # noqa: E501, RST201, RST202 @@ -1914,7 +2079,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): self.key = key if key is not None else name # type: ignore self.primary_key = primary_key - + self._insert_sentinel = insert_sentinel + self._omit_from_statements = _omit_from_statements self._user_defined_nullable = udn = nullable if udn is not NULL_UNSPECIFIED: self.nullable = udn @@ -1962,22 +2128,26 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): else: self.onpudate = None - self.server_default = server_default - self.server_onupdate = server_onupdate - - if self.server_default is not None: - if isinstance(self.server_default, FetchedValue): - l_args.append(self.server_default._as_for_update(False)) + if server_default is not None: + if isinstance(server_default, FetchedValue): + server_default = server_default._as_for_update(False) + l_args.append(server_default) else: - l_args.append(DefaultClause(self.server_default)) + server_default = DefaultClause(server_default) + l_args.append(server_default) + self.server_default = server_default - if self.server_onupdate is not None: - if isinstance(self.server_onupdate, FetchedValue): - l_args.append(self.server_onupdate._as_for_update(True)) + if server_onupdate is not None: + if isinstance(server_onupdate, FetchedValue): + server_onupdate = server_onupdate._as_for_update(True) + l_args.append(server_onupdate) else: - l_args.append( - DefaultClause(self.server_onupdate, for_update=True) + server_onupdate = DefaultClause( + server_onupdate, for_update=True ) + l_args.append(server_onupdate) + self.server_onupdate = server_onupdate + self._init_items(*cast(_typing_Sequence[SchemaItem], l_args)) util.set_creation_order(self) @@ -2042,6 +2212,17 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): if isinstance(impl, SchemaEventTarget): impl._set_parent_with_dispatch(self) + @HasMemoized.memoized_attribute + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @HasMemoized.memoized_attribute + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + @util.memoized_property def _gen_static_annotations_cache_key(self) -> bool: # type: ignore """special attribute used by cache key gen, if true, we will @@ -2185,6 +2366,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): all_names[self.name] = self self.table = table + if self._insert_sentinel: + if self.table._sentinel_column is not None: + raise exc.ArgumentError( + "a Table may have only one explicit sentinel column" + ) + self.table._sentinel_column = self + if self.primary_key: table.primary_key._replace(self) elif self.key in table.primary_key: @@ -2316,6 +2504,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): server_onupdate=server_onupdate, doc=self.doc, comment=self.comment, + _omit_from_statements=self._omit_from_statements, + insert_sentinel=self._insert_sentinel, *args, **column_kwargs, ) @@ -2472,6 +2662,56 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): return c.key, c +def insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[_T]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> Column[Any]: + """Provides a surrogate :class:`_schema.Column` that will act as a + dedicated insert :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that + don't otherwise have qualifying primary key configurations. + + Adding this column to a :class:`.Table` object requires that a + corresponding database table actually has this column present, so if adding + it to an existing model, existing database tables would need to be migrated + (e.g. using ALTER TABLE or similar) to include this column. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + The :class:`_schema.Column` returned will be a nullable integer column by + default and make use of a sentinel-specific default generator used only in + "insertmanyvalues" operations. + + .. seealso:: + + :func:`_orm.orm_insert_sentinel` + + :paramref:`_schema.Column.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + return Column( + name=name, + type_=type_api.INTEGERTYPE if type_ is None else type_, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + ) + + class ForeignKey(DialectKWArgs, SchemaItem): """Defines a dependency between two columns. @@ -3058,17 +3298,24 @@ else: class DefaultGenerator(Executable, SchemaItem): - """Base class for column *default* values.""" + """Base class for column *default* values. + + This object is only present on column.default or column.onupdate. + It's not valid as a server default. + + """ __visit_name__ = "default_generator" _is_default_generator = True is_sequence = False + is_identity = False is_server_default = False is_clause_element = False is_callable = False is_scalar = False has_arg = False + is_sentinel = False column: Optional[Column[Any]] def __init__(self, for_update: bool = False) -> None: @@ -3222,6 +3469,44 @@ class ScalarElementColumnDefault(ColumnDefault): ) +class _InsertSentinelColumnDefault(ColumnDefault): + """Default generator that's specific to the use of a "sentinel" column + when using the insertmanyvalues feature. + + This default is used as part of the :func:`_schema.insert_sentinel` + construct. + + """ + + is_sentinel = True + for_update = False + arg = None + + def __new__(cls) -> _InsertSentinelColumnDefault: + return object.__new__(cls) + + def __init__(self) -> None: + pass + + def _set_parent(self, parent: SchemaEventTarget, **kw: Any) -> None: + col = cast("Column[Any]", parent) + if not col._insert_sentinel: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column marked as insert_sentinel=True" + ) + elif not col.nullable: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column that is nullable" + ) + + super()._set_parent(parent, **kw) + + def _copy(self) -> _InsertSentinelColumnDefault: + return _InsertSentinelColumnDefault() + + _SQLExprDefault = Union["ColumnElement[Any]", "TextClause"] @@ -3366,6 +3651,10 @@ class IdentityOptions: self.cache = cache self.order = order + @property + def _increment_is_negative(self) -> bool: + return self.increment is not None and self.increment < 0 + class Sequence(HasSchemaAttr, IdentityOptions, DefaultGenerator): """Represents a named database sequence. @@ -3674,6 +3963,7 @@ class FetchedValue(SchemaEventTarget): reflected = False has_argument = False is_clause_element = False + is_identity = False column: Optional[Column[Any]] @@ -5668,6 +5958,8 @@ class Identity(IdentityOptions, FetchedValue, SchemaItem): __visit_name__ = "identity_column" + is_identity = True + def __init__( self, always: bool = False, diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 5f530ba7d..8a371951e 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -63,6 +63,7 @@ from .base import _EntityNamespace from .base import _expand_cloned from .base import _from_objects from .base import _generative +from .base import _never_select_column from .base import _NoArg from .base import _select_iterables from .base import CacheableOptions @@ -930,7 +931,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): @util.ro_non_memoized_property def _select_iterable(self) -> _SelectIterable: - return self.c + return (c for c in self.c if not _never_select_column(c)) def _init_collections(self) -> None: assert "_columns" not in self.__dict__ diff --git a/lib/sqlalchemy/sql/type_api.py b/lib/sqlalchemy/sql/type_api.py index ddbdb49a0..5af12cb93 100644 --- a/lib/sqlalchemy/sql/type_api.py +++ b/lib/sqlalchemy/sql/type_api.py @@ -94,6 +94,11 @@ class _ResultProcessorType(Protocol[_T_co]): ... +class _SentinelProcessorType(Protocol[_T_co]): + def __call__(self, value: Any) -> Optional[_T_co]: + ... + + class _BaseTypeMemoDict(TypedDict): impl: TypeEngine[Any] result: Dict[Any, Optional[_ResultProcessorType[Any]]] @@ -102,6 +107,7 @@ class _BaseTypeMemoDict(TypedDict): class _TypeMemoDict(_BaseTypeMemoDict, total=False): literal: Optional[_LiteralProcessorType[Any]] bind: Optional[_BindProcessorType[Any]] + sentinel: Optional[_SentinelProcessorType[Any]] custom: Dict[Any, object] @@ -598,6 +604,18 @@ class TypeEngine(Visitable, Generic[_T]): """ return None + def _sentinel_value_resolver( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + """Return an optional callable that will match parameter values + (post-bind processing) to result values + (pre-result-processing), for use in the "sentinel" feature. + + .. versionadded:: 2.0.10 + + """ + return None + @util.memoized_property def _has_bind_expression(self) -> bool: """memoized boolean, check if bind_expression is implemented. @@ -945,6 +963,19 @@ class TypeEngine(Visitable, Generic[_T]): d["result"][coltype] = rp return rp + def _cached_sentinel_value_processor( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + + try: + return dialect._type_memos[self]["sentinel"] + except KeyError: + pass + + d = self._dialect_info(dialect) + d["sentinel"] = bp = d["impl"]._sentinel_value_resolver(dialect) + return bp + def _cached_custom_processor( self, dialect: Dialect, key: str, fn: Callable[[TypeEngine[_T]], _O] ) -> _O: diff --git a/lib/sqlalchemy/testing/config.py b/lib/sqlalchemy/testing/config.py index 6c5ebd3dd..855918365 100644 --- a/lib/sqlalchemy/testing/config.py +++ b/lib/sqlalchemy/testing/config.py @@ -10,6 +10,7 @@ from __future__ import annotations import collections +import inspect import typing from typing import Any from typing import Callable @@ -185,7 +186,7 @@ class Variation: return [typ(casename, argname, case_names) for casename in case_names] -def variation(argname, cases): +def variation(argname_or_fn, cases=None): """a helper around testing.combinations that provides a single namespace that can be used as a switch. @@ -221,6 +222,17 @@ def variation(argname, cases): """ + if inspect.isfunction(argname_or_fn): + argname = argname_or_fn.__name__ + cases = argname_or_fn(None) + + @variation_fixture(argname, cases) + def go(self, request): + yield request.param + + return go + else: + argname = argname_or_fn cases_plus_limitations = [ entry if (isinstance(entry, tuple) and len(entry) == 2) diff --git a/lib/sqlalchemy/testing/fixtures.py b/lib/sqlalchemy/testing/fixtures.py index a8bc6c50a..fc1fa1483 100644 --- a/lib/sqlalchemy/testing/fixtures.py +++ b/lib/sqlalchemy/testing/fixtures.py @@ -10,12 +10,14 @@ from __future__ import annotations import itertools +import random import re import sys import sqlalchemy as sa from . import assertions from . import config +from . import mock from . import schema from .assertions import eq_ from .assertions import ne_ @@ -1003,3 +1005,54 @@ class CacheKeyFixture: range(len(case_a)), 2 ): self._compare_equal(case_a[a], case_b[b], compare_values) + + +def insertmanyvalues_fixture( + connection, randomize_rows=False, warn_on_downgraded=False +): + + dialect = connection.dialect + orig_dialect = dialect._deliver_insertmanyvalues_batches + orig_conn = connection._exec_insertmany_context + + class RandomCursor: + __slots__ = ("cursor",) + + def __init__(self, cursor): + self.cursor = cursor + + # only this method is called by the deliver method. + # by not having the other methods we assert that those aren't being + # used + + def fetchall(self): + rows = self.cursor.fetchall() + rows = list(rows) + random.shuffle(rows) + return rows + + def _deliver_insertmanyvalues_batches( + cursor, statement, parameters, generic_setinputsizes, context + ): + if randomize_rows: + cursor = RandomCursor(cursor) + for batch in orig_dialect( + cursor, statement, parameters, generic_setinputsizes, context + ): + if warn_on_downgraded and batch.is_downgraded: + util.warn("Batches were downgraded for sorted INSERT") + + yield batch + + def _exec_insertmany_context( + dialect, + context, + ): + with mock.patch.object( + dialect, + "_deliver_insertmanyvalues_batches", + new=_deliver_insertmanyvalues_batches, + ): + return orig_conn(dialect, context) + + connection._exec_insertmany_context = _exec_insertmany_context diff --git a/lib/sqlalchemy/testing/provision.py b/lib/sqlalchemy/testing/provision.py index 43821854c..66ccb3ee8 100644 --- a/lib/sqlalchemy/testing/provision.py +++ b/lib/sqlalchemy/testing/provision.py @@ -43,7 +43,7 @@ class register: return decorate - def __call__(self, cfg, *arg): + def __call__(self, cfg, *arg, **kw): if isinstance(cfg, str): url = sa_url.make_url(cfg) elif isinstance(cfg, sa_url.URL): @@ -52,9 +52,9 @@ class register: url = cfg.db.url backend = url.get_backend_name() if backend in self.fns: - return self.fns[backend](cfg, *arg) + return self.fns[backend](cfg, *arg, **kw) else: - return self.fns["*"](cfg, *arg) + return self.fns["*"](cfg, *arg, **kw) def create_follower_db(follower_ident): @@ -462,7 +462,9 @@ def set_default_schema_on_connection(cfg, dbapi_connection, schema_name): @register.init -def upsert(cfg, table, returning, set_lambda=None): +def upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): """return the backends insert..on conflict / on dupe etc. construct. while we should add a backend-neutral upsert construct as well, such as diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index 44d174086..3ba7f8e89 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -1891,6 +1891,31 @@ class UuidTest(_LiteralRoundTripFixture, fixtures.TablesTest): filter_=lambda x: x.lower(), ) + @testing.requires.insert_returning + def test_uuid_returning(self, connection): + data = uuid.uuid4() + str_data = str(data) + uuid_table = self.tables.uuid_table + + result = connection.execute( + uuid_table.insert().returning( + uuid_table.c.uuid_data, + uuid_table.c.uuid_text_data, + uuid_table.c.uuid_data_nonnative, + uuid_table.c.uuid_text_data_nonnative, + ), + { + "id": 1, + "uuid_data": data, + "uuid_text_data": str_data, + "uuid_data_nonnative": data, + "uuid_text_data_nonnative": str_data, + }, + ) + row = result.first() + + eq_(row, (data, str_data, data, str_data)) + class NativeUUIDTest(UuidTest): __requires__ = ("uuid_data_type",) diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py index d443cf031..6f409c9e2 100644 --- a/lib/sqlalchemy/util/__init__.py +++ b/lib/sqlalchemy/util/__init__.py @@ -26,6 +26,7 @@ from ._collections import IdentitySet as IdentitySet from ._collections import immutabledict as immutabledict from ._collections import LRUCache as LRUCache from ._collections import merge_lists_w_ordering as merge_lists_w_ordering +from ._collections import NONE_SET as NONE_SET from ._collections import ordered_column_set as ordered_column_set from ._collections import OrderedDict as OrderedDict from ._collections import OrderedIdentitySet as OrderedIdentitySet diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py index af117dc92..2e793e862 100644 --- a/lib/sqlalchemy/util/_collections.py +++ b/lib/sqlalchemy/util/_collections.py @@ -69,6 +69,7 @@ _VT = TypeVar("_VT", bound=Any) _T_co = TypeVar("_T_co", covariant=True) EMPTY_SET: FrozenSet[Any] = frozenset() +NONE_SET: FrozenSet[Any] = frozenset([None]) def merge_lists_w_ordering(a: List[Any], b: List[Any]) -> List[Any]: |
