diff options
author | mike bayer <mike_mp@zzzcomputing.com> | 2023-04-21 16:51:19 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@bbpush.zzzcomputing.com> | 2023-04-21 16:51:19 +0000 |
commit | c84b3bf198c75ad4f42b0f83d482e480200e6d16 (patch) | |
tree | 86e991ad8f43515ec7948ff809f44bb7d8b301fa | |
parent | 95628d9707cdfbfdd229b2acee02fbadfbe7ced0 (diff) | |
parent | cf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (diff) | |
download | sqlalchemy-c84b3bf198c75ad4f42b0f83d482e480200e6d16.tar.gz |
Merge "add deterministic imv returning ordering using sentinel columns" into main
71 files changed, 5266 insertions, 660 deletions
diff --git a/doc/build/changelog/unreleased_20/9618.rst b/doc/build/changelog/unreleased_20/9618.rst new file mode 100644 index 000000000..7b7e4fbff --- /dev/null +++ b/doc/build/changelog/unreleased_20/9618.rst @@ -0,0 +1,52 @@ +.. change:: + :tags: bug, engine + :tickets: 9618, 9603 + + Repaired a major shortcoming which was identified in the + :ref:`engine_insertmanyvalues` performance optimization feature first + introduced in the 2.0 series. This was a continuation of the change in + 2.0.9 which disabled the SQL Server version of the feature due to a + reliance in the ORM on apparent row ordering that is not guaranteed to take + place. The fix applies new logic to all "insertmanyvalues" operations, + which takes effect when a new parameter + :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the + :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` + methods, that through a combination of alternate SQL forms, direct + correspondence of client side parameters, and in some cases downgrading to + running row-at-a-time, will apply sorting to each batch of returned rows + using correspondence to primary key or other unique values in each row + which can be correlated to the input data. + + Performance impact is expected to be minimal as nearly all common primary + key scenarios are suitable for parameter-ordered batching to be + achieved for all backends other than SQLite, while "row-at-a-time" + mode operates with a bare minimum of Python overhead compared to the very + heavyweight approaches used in the 1.x series. For SQLite, there is no + difference in performance when "row-at-a-time" mode is used. + + It's anticipated that with an efficient "row-at-a-time" INSERT with + RETURNING batching capability, the "insertmanyvalues" feature can be later + be more easily generalized to third party backends that include RETURNING + support but not necessarily easy ways to guarantee a correspondence + with parameter order. + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` + + +.. change:: + :tags: bug, mssql + :tickets: 9618, 9603 + + Restored the :term:`insertmanyvalues` feature for Microsoft SQL Server. + This feature was disabled in version 2.0.9 due to an apparent reliance + on the ordering of RETURNING that is not guaranteed. The architecture of + the "insertmanyvalues" feature has been reworked to accommodate for + specific organizations of INSERT statements and result row handling that + can guarantee the correspondence of returned rows to input records. + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` + diff --git a/doc/build/changelog/unreleased_20/oracle_uuid.rst b/doc/build/changelog/unreleased_20/oracle_uuid.rst new file mode 100644 index 000000000..820339dde --- /dev/null +++ b/doc/build/changelog/unreleased_20/oracle_uuid.rst @@ -0,0 +1,5 @@ +.. change:: + :tags: bug, oracle + + Fixed issue where the :class:`_sqltypes.Uuid` datatype could not be used in + an INSERT..RETURNING clause with the Oracle dialect. diff --git a/doc/build/changelog/whatsnew_20.rst b/doc/build/changelog/whatsnew_20.rst index 02bd22bc6..366c9c0ff 100644 --- a/doc/build/changelog/whatsnew_20.rst +++ b/doc/build/changelog/whatsnew_20.rst @@ -974,21 +974,20 @@ Driver SQLA 1.4 Time (secs) SQLA 2.0 Time (secs) sqlite+pysqlite2 (memory) 6.204843 3.554856 postgresql+asyncpg (network) 88.292285 4.561492 postgresql+psycopg (network) N/A (psycopg3) 4.861368 +mssql+pyodbc (network) 158.396667 4.825139 oracle+cx_Oracle (network) 92.603953 4.809520 mariadb+mysqldb (network) 71.705197 4.075377 ============================ ==================== ==================== -.. mssql+pyodbc (network) .. 158.396667 .. 4.825139 .. note:: - .. [#] The feature is disabled for SQL Server as of SQLAlchemy 2.0.9 due - to incompatibilities in how table-valued expressions are handled by - SQL Server regarding the ORM unit of work. An upcoming release will - re-enable it with unit-of-work oriented adjustments. - See https://github.com/sqlalchemy/sqlalchemy/issues/9603 and - https://github.com/sqlalchemy/sqlalchemy/issues/9618. + .. [#] The feature is was temporarily disabled for SQL Server in + SQLAlchemy 2.0.9 due to issues with row ordering when RETURNING is used. + In SQLAlchemy 2.0.10, the feature is re-enabled, with special + case handling for the unit of work's requirement for RETURNING to be + ordered. Two additional drivers have no change in performance; the psycopg2 drivers, for which fast executemany was already implemented in SQLAlchemy 1.4, diff --git a/doc/build/core/connections.rst b/doc/build/core/connections.rst index dc81750cd..f0d74f6d1 100644 --- a/doc/build/core/connections.rst +++ b/doc/build/core/connections.rst @@ -1776,15 +1776,21 @@ performance example. .. versionadded:: 2.0 see :ref:`change_6047` for background on the change including sample performance tests +.. tip:: The :term:`insertmanyvalues` feature is a **transparently available** + performance feature which requires no end-user intervention in order for + it to take place as needed. This section describes the architecture + of the feature as well as how to measure its performance and tune its + behavior in order to optimize the speed of bulk INSERT statements, + particularly as used by the ORM. + As more databases have added support for INSERT..RETURNING, SQLAlchemy has undergone a major change in how it approaches the subject of INSERT statements where there's a need to acquire server-generated values, most importantly server-generated primary key values which allow the new row to be referenced in -subsequent operations. This issue has for over a decade prevented SQLAlchemy -from being able to batch large sets of rows into a small number of database -round trips for the very common case where primary key values are -server-generated, and historically has been the most significant performance -bottleneck in the ORM. +subsequent operations. In particular, this scenario has long been a significant +performance issue in the ORM, which relies on being able to retrieve +server-generated primary key values in order to correctly populate the +:term:`identity map`. With recent support for RETURNING added to SQLite and MariaDB, SQLAlchemy no longer needs to rely upon the single-row-only @@ -1803,57 +1809,28 @@ from the feature of the ``psycopg2`` DBAPI, which SQLAlchemy incrementally added more and more support towards in recent release series. -Concretely, for most backends the behavior will rewrite a statement of the -form: - -.. sourcecode:: sql - - INSERT INTO a (data, x, y) VALUES (%(data)s, %(x)s, %(y)s) RETURNING a.id - -into a "batched" form as: - -.. sourcecode:: sql - - INSERT INTO a (data, x, y) VALUES - (%(data_0)s, %(x_0)s, %(y_0)s), - (%(data_1)s, %(x_1)s, %(y_1)s), - (%(data_2)s, %(x_2)s, %(y_2)s), - ... - (%(data_78)s, %(x_78)s, %(y_78)s) - RETURNING a.id - -It's also important to note that the feature will invoke **multiple INSERT -statements** using the DBAPI ``cursor.execute()`` method, -within the scope of **single** call to the Core-level -:meth:`_engine.Connection.execute` method, -with each statement containing up to a fixed limit of parameter sets. -This limit is configurable as described below at :ref:`engine_insertmanyvalues_page_size`. -The separate calls to ``cursor.execute()`` are logged individually and -also individually passed along to event listeners such as -:meth:`.ConnectionEvents.before_cursor_execute` (see :ref:`engine_insertmanyvalues_events` -below). - -The feature is enabled for included SQLAlchemy backends that support RETURNING -as well as "multiple VALUES()" clauses within INSERT statements, -and takes place for all INSERT...RETURNING statements that are used with -"executemany" style execution, which occurs when passing a list of dictionaries -to the :paramref:`_engine.Connection.execute.parameters` parameter of the -:meth:`_engine.Connection.execute` method, as well as throughout Core and ORM -for any similar method including ORM methods like :meth:`_orm.Session.execute` -and asyncio methods like :meth:`_asyncio.AsyncConnection.execute` and -:meth:`_asyncio.AsyncSession.execute`. The ORM itself also makes use of the -feature within the :term:`unit of work` process when inserting many rows, -that is, for large numbers of objects added to a :class:`_orm.Session` using -methods such as :meth:`_orm.Session.add` and :meth:`_orm.Session.add_all`. +Current Support +~~~~~~~~~~~~~~~ + +The feature is enabled for all included SQLAlchemy backends that support +RETURNING, with the exception of Oracle for which both the cx_Oracle and +OracleDB drivers offer their own equivalent feature. The feature normally takes +place when making use of the :meth:`_dml.Insert.returning` method of an +:class:`_dml.Insert` construct in conjunction with :term:`executemany` +execution, which occurs when passing a list of dictionaries to the +:paramref:`_engine.Connection.execute.parameters` parameter of the +:meth:`_engine.Connection.execute` or :meth:`_orm.Session.execute` methods (as +well as equivalent methods under :ref:`asyncio <asyncio_toplevel>` and +shorthand methods like :meth:`_orm.Session.scalars`). It also takes place +within the ORM :term:`unit of work` process when using methods such as +:meth:`_orm.Session.add` and :meth:`_orm.Session.add_all` to add rows. For SQLAlchemy's included dialects, support or equivalent support is currently as follows: * SQLite - supported for SQLite versions 3.35 and above * PostgreSQL - all supported Postgresql versions (9 and above) -* SQL Server - **disabled by default as of SQLAlchemy 2.0.9** - the SQL syntax - used has been shown to not be safe for RETURNING - (see https://github.com/sqlalchemy/sqlalchemy/issues/9603) +* SQL Server - all supported SQL Server versions [#]_ * MariaDB - supported for MariaDB versions 10.5 and above * MySQL - no support, no RETURNING feature is present * Oracle - supports RETURNING with executemany using native cx_Oracle / OracleDB @@ -1861,8 +1838,13 @@ as follows: parameters. This is not the same implementation as "executemanyvalues", however has the same usage patterns and equivalent performance benefits. -Enabling/Disabling the feature -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. versionchanged:: 2.0.10 + + .. [#] "insertmanyvalues" support for Microsoft SQL Server + is restored, after being temporarily disabled in version 2.0.9. + +Disabling the feature +~~~~~~~~~~~~~~~~~~~~~ To disable the "insertmanyvalues" feature for a given backend for an :class:`.Engine` overall, pass the @@ -1888,6 +1870,326 @@ The feature can also be disabled from being used implicitly for a particular The reason one might want to disable RETURNING for a specific table is to work around backend-specific limitations. + +Batched Mode Operation +~~~~~~~~~~~~~~~~~~~~~~ + +The feature has two modes of operation, which are selected transparently on a +per-dialect, per-:class:`_schema.Table` basis. One is **batched mode**, +which reduces the number of database round trips by rewriting an +INSERT statement of the form: + +.. sourcecode:: sql + + INSERT INTO a (data, x, y) VALUES (%(data)s, %(x)s, %(y)s) RETURNING a.id + +into a "batched" form such as: + +.. sourcecode:: sql + + INSERT INTO a (data, x, y) VALUES + (%(data_0)s, %(x_0)s, %(y_0)s), + (%(data_1)s, %(x_1)s, %(y_1)s), + (%(data_2)s, %(x_2)s, %(y_2)s), + ... + (%(data_78)s, %(x_78)s, %(y_78)s) + RETURNING a.id + +where above, the statement is organized against a subset (a "batch") of the +input data, the size of which is determined by the database backend as well as +the number of parameters in each batch to correspond to known limits for +statement size / number of parameters. The feature then executes the INSERT +statement once for each batch of input data until all records are consumed, +concatenating the RETURNING results for each batch into a single large +rowset that's available from a single :class:`_result.Result` object. + +This "batched" form allows INSERT of many rows using much fewer database round +trips, and has been shown to allow dramatic performance improvements for most +backends where it's supported. + +.. _engine_insertmanyvalues_returning_order: + +Correlating RETURNING rows to parameter sets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 2.0.10 + +The "batch" mode query illustrated in the previous section does not guarantee +the order of records returned would correspond with that of the input data. +When used by the SQLAlchemy ORM :term:`unit of work` process, as well as for +applications which correlate returned server-generated values with input data, +the :meth:`_dml.Insert.returning` and :meth:`_dml.UpdateBase.return_defaults` +methods include an option +:paramref:`_dml.Insert.returning.sort_by_parameter_order` which indicates that +"insertmanyvalues" mode should guarantee this correspondence. This is **not +related** to the order in which records are actually INSERTed by the database +backend, which is **not** assumed under any circumstances; only that the +returned records should be organized when received back to correspond to the +order in which the original input data was passed. + +When the :paramref:`_dml.Insert.returning.sort_by_parameter_order` parameter is +present, for tables that use server-generated integer primary key values such +as ``IDENTITY``, PostgreSQL ``SERIAL``, MariaDB ``AUTO_INCREMENT``, or SQLite's +``ROWID`` scheme, "batch" mode may instead opt to use a more complex +INSERT..RETURNING form, in conjunction with post-execution sorting of rows +based on the returned values, or if +such a form is not available, the "insertmanyvalues" feature may gracefully +degrade to "non-batched" mode which runs individual INSERT statements for each +parameter set. + +For example, on SQL Server when an auto incrementing ``IDENTITY`` column is +used as the primary key, the following SQL form is used: + +.. sourcecode:: sql + + INSERT INTO a (data, x, y) + OUTPUT inserted.id, inserted.id AS id__1 + SELECT p0, p1, p2 FROM (VALUES + (?, ?, ?, 0), (?, ?, ?, 1), (?, ?, ?, 2), + ... + (?, ?, ?, 77) + ) AS imp_sen(p0, p1, p2, sen_counter) ORDER BY sen_counter + +A similar form is used for PostgreSQL as well, when primary key columns use +SERIAL or IDENTITY. The above form **does not** guarantee the order in which +rows are inserted. However, it does guarantee that the IDENTITY or SERIAL +values will be created in order with each parameter set [#]_. The +"insertmanyvalues" feature then sorts the returned rows for the above INSERT +statement by incrementing integer identity. + +For the SQLite database, there is no appropriate INSERT form that can +correlate the production of new ROWID values with the order in which +the parameter sets are passed. As a result, when using server-generated +primary key values, the SQLite backend will degrade to "non-batched" +mode when ordered RETURNING is requested. +For MariaDB, the default INSERT form used by insertmanyvalues is sufficient, +as this database backend will line up the +order of AUTO_INCREMENT with the order of input data when using InnoDB [#]_. + +For a client-side generated primary key, such as when using the Python +``uuid.uuid4()`` function to generate new values for a :class:`.Uuid` column, +the "insertmanyvalues" feature transparently includes this column in the +RETURNING records and correlates its value to that of the given input records, +thus maintaining correspondence between input records and result rows. From +this, it follows that all backends allow for batched, parameter-correlated +RETURNING order when client-side-generated primary key values are used. + +The subject of how "insertmanyvalues" "batch" mode determines a column or +columns to use as a point of correspondence between input parameters and +RETURNING rows is known as an :term:`insert sentinel`, which is a specific +column or columns that are used to track such values. The "insert sentinel" is +normally selected automatically, however can also be user-configuration for +extremely special cases; the section +:ref:`engine_insertmanyvalues_sentinel_columns` describes this. + +For backends that do not offer an appropriate INSERT form that can deliver +server-generated values deterministically aligned with input values, or +for :class:`_schema.Table` configurations that feature other kinds of +server generated primary key values, "insertmanyvalues" mode will make use +of **non-batched** mode when guaranteed RETURNING ordering is requested. + +.. seealso:: + + .. [#] + + * Microsoft SQL Server rationale + + "INSERT queries that use SELECT with ORDER BY to populate rows guarantees + how identity values are computed but not the order in which the rows are inserted." + https://learn.microsoft.com/en-us/sql/t-sql/statements/insert-transact-sql?view=sql-server-ver16#limitations-and-restrictions + + * PostgreSQL batched INSERT Discussion + + Original description in 2018 https://www.postgresql.org/message-id/29386.1528813619@sss.pgh.pa.us + + Follow up in 2023 - https://www.postgresql.org/message-id/be108555-da2a-4abc-a46b-acbe8b55bd25%40app.fastmail.com + + .. [#] + + * MariaDB AUTO_INCREMENT behavior (using the same InnoDB engine as MySQL): + + https://dev.mysql.com/doc/refman/8.0/en/innodb-auto-increment-handling.html + + https://dba.stackexchange.com/a/72099 + +.. _engine_insertmanyvalues_non_batch: + +Non-Batched Mode Operation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For :class:`_schema.Table` configurations that do not have client side primary +key values, and offer server-generated primary key values (or no primary key) +that the database in question is not able to invoke in a deterministic or +sortable way relative to multiple parameter sets, the "insertmanyvalues" +feature when tasked with satisfying the +:paramref:`_dml.Insert.returning.sort_by_parameter_order` requirement for an +:class:`_dml.Insert` statement may instead opt to use **non-batched mode**. + +In this mode, the original SQL form of INSERT is maintained, and the +"insertmanyvalues" feature will instead run the statement as given for each +parameter set individually, organizing the returned rows into a full result +set. Unlike previous SQLAlchemy versions, it does so in a tight loop that +minimizes Python overhead. In some cases, such as on SQLite, "non-batched" mode +performs exactly as well as "batched" mode. + +Statement Execution Model +~~~~~~~~~~~~~~~~~~~~~~~~~ + +For both "batched" and "non-batched" modes, the feature will necessarily +invoke **multiple INSERT statements** using the DBAPI ``cursor.execute()`` method, +within the scope of **single** call to the Core-level +:meth:`_engine.Connection.execute` method, +with each statement containing up to a fixed limit of parameter sets. +This limit is configurable as described below at :ref:`engine_insertmanyvalues_page_size`. +The separate calls to ``cursor.execute()`` are logged individually and +also individually passed along to event listeners such as +:meth:`.ConnectionEvents.before_cursor_execute` (see :ref:`engine_insertmanyvalues_events` +below). + + + + +.. _engine_insertmanyvalues_sentinel_columns: + +Configuring Sentinel Columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In typical cases, the "insertmanyvalues" feature in order to provide +INSERT..RETURNING with deterministic row order will automatically determine a +sentinel column from a given table's primary key, gracefully degrading to "row +at a time" mode if one cannot be identified. As a completely **optional** +feature, to get full "insertmanyvalues" bulk performance for tables that have +server generated primary keys whose default generator functions aren't +compatible with the "sentinel" use case, other non-primary key columns may be +marked as "sentinel" columns assuming they meet certain requirements. A typical +example is a non-primary key :class:`_sqltypes.Uuid` column with a client side +default such as the Python ``uuid.uuid4()`` function. There is also a construct to create +simple integer columns with a a client side integer counter oriented towards +the "insertmanyvalues" use case. + +Sentinel columns may be indicated by adding :paramref:`_schema.Column.insert_sentinel` +to qualifying columns. The most basic "qualifying" column is a not-nullable, +unique column with a client side default, such as a UUID column as follows:: + + import uuid + + from sqlalchemy import Column + from sqlalchemy import FetchedValue + from sqlalchemy import Integer + from sqlalchemy import String + from sqlalchemy import Table + from sqlalchemy import Uuid + + my_table = Table( + "some_table", + metadata, + # assume some arbitrary server-side function generates + # primary key values, so cannot be tracked by a bulk insert + Column("id", String(50), server_default=FetchedValue(), primary_key=True), + Column("data", String(50)), + Column( + "uniqueid", + Uuid(), + default=uuid.uuid4, + nullable=False, + unique=True, + insert_sentinel=True, + ), + ) + +When using ORM Declarative models, the same forms are available using +the :class:`_orm.mapped_column` construct:: + + import uuid + + from sqlalchemy.orm import DeclarativeBase + from sqlalchemy.orm import Mapped + from sqlalchemy.orm import mapped_column + + + class Base(DeclarativeBase): + pass + + + class MyClass(Base): + __tablename__ = "my_table" + + id: Mapped[str] = mapped_column(primary_key=True, server_default=FetchedValue()) + data: Mapped[str] = mapped_column(String(50)) + uniqueid: Mapped[uuid.UUID] = mapped_column( + default=uuid.uuid4, unique=True, insert_sentinel=True + ) + +While the values generated by the default generator **must** be unique, the +actual UNIQUE constraint on the above "sentinel" column, indicated by the +``unique=True`` parameter, itself is optional and may be omitted if not +desired. + +There is also a special form of "insert sentinel" that's a dedicated nullable +integer column which makes use of a special default integer counter that's only +used during "insertmanyvalues" operations; as an additional behavior, the +column will omit itself from SQL statements and result sets and behave in a +mostly transparent manner. It does need to be physically present within +the actual database table, however. This style of :class:`_schema.Column` +may be constructed using the function :func:`_schema.insert_sentinel`:: + + from sqlalchemy import Column + from sqlalchemy import Integer + from sqlalchemy import String + from sqlalchemy import Table + from sqlalchemy import Uuid + from sqlalchemy import insert_sentinel + + Table( + "some_table", + metadata, + Column("id", Integer, primary_key=True), + Column("data", String(50)), + insert_sentinel("sentinel"), + ) + +When using ORM Declarative, a Declarative-friendly version of +:func:`_schema.insert_sentinel` is available called +:func:`_orm.orm_insert_sentinel`, which has the ability to be used on the Base +class or a mixin; if packaged using :func:`_orm.declared_attr`, the column will +apply itself to all table-bound subclasses including within joined inheritance +hierarchies:: + + + from sqlalchemy.orm import declared_attr + from sqlalchemy.orm import DeclarativeBase + from sqlalchemy.orm import Mapped + from sqlalchemy.orm import mapped_column + from sqlalchemy.orm import orm_insert_sentinel + + + class Base(DeclarativeBase): + @declared_attr + def _sentinel(cls) -> Mapped[int]: + return orm_insert_sentinel() + + + class MyClass(Base): + __tablename__ = "my_table" + + id: Mapped[str] = mapped_column(primary_key=True, server_default=FetchedValue()) + data: Mapped[str] = mapped_column(String(50)) + + + class MySubClass(MyClass): + __tablename__ = "sub_table" + + id: Mapped[str] = mapped_column(ForeignKey("my_table.id"), primary_key=True) + + + class MySingleInhClass(MyClass): + pass + +In the example above, both "my_table" and "sub_table" will have an additional +integer column named "_sentinel" that can be used by the "insertmanyvalues" +feature to help optimize bulk inserts used by the ORM. + + .. _engine_insertmanyvalues_page_size: Controlling the Batch Size @@ -1960,8 +2262,8 @@ Or configured on the statement itself:: Logging and Events ~~~~~~~~~~~~~~~~~~ -The "insertmanyvalues" feature integrates fully with SQLAlchemy's statement -logging as well as cursor events such as :meth:`.ConnectionEvents.before_cursor_execute`. +The "insertmanyvalues" feature integrates fully with SQLAlchemy's :ref:`statement +logging <dbengine_logging>` as well as cursor events such as :meth:`.ConnectionEvents.before_cursor_execute`. When the list of parameters is broken into separate batches, **each INSERT statement is logged and passed to event handlers individually**. This is a major change compared to how the psycopg2-only feature worked in previous 1.x series of @@ -1973,14 +2275,36 @@ an excerpt of this logging: .. sourcecode:: text INSERT INTO a (data, x, y) VALUES (?, ?, ?), ... 795 characters truncated ... (?, ?, ?), (?, ?, ?) RETURNING id - [generated in 0.00177s (insertmanyvalues)] ('d0', 0, 0, 'd1', ... + [generated in 0.00177s (insertmanyvalues) 1/10 (unordered)] ('d0', 0, 0, 'd1', ... INSERT INTO a (data, x, y) VALUES (?, ?, ?), ... 795 characters truncated ... (?, ?, ?), (?, ?, ?) RETURNING id - [insertmanyvalues batch 2 of 10] ('d100', 100, 1000, 'd101', ... + [insertmanyvalues 2/10 (unordered)] ('d100', 100, 1000, 'd101', ... ... INSERT INTO a (data, x, y) VALUES (?, ?, ?), ... 795 characters truncated ... (?, ?, ?), (?, ?, ?) RETURNING id - [insertmanyvalues batch 10 of 10] ('d900', 900, 9000, 'd901', ... + [insertmanyvalues 10/10 (unordered)] ('d900', 900, 9000, 'd901', ... + +When :ref:`non-batch mode <engine_insertmanyvalues_non_batch>` takes place, logging +will indicate this along with the insertmanyvalues message: + +.. sourcecode:: text + + ... + + INSERT INTO a (data, x, y) VALUES (?, ?, ?) RETURNING id + [insertmanyvalues 67/78 (ordered; batch not supported)] ('d66', 66, 66) + INSERT INTO a (data, x, y) VALUES (?, ?, ?) RETURNING id + [insertmanyvalues 68/78 (ordered; batch not supported)] ('d67', 67, 67) + INSERT INTO a (data, x, y) VALUES (?, ?, ?) RETURNING id + [insertmanyvalues 69/78 (ordered; batch not supported)] ('d68', 68, 68) + INSERT INTO a (data, x, y) VALUES (?, ?, ?) RETURNING id + [insertmanyvalues 70/78 (ordered; batch not supported)] ('d69', 69, 69) + + ... + +.. seealso:: + + :ref:`dbengine_logging` Upsert Support ~~~~~~~~~~~~~~ diff --git a/doc/build/core/metadata.rst b/doc/build/core/metadata.rst index aa7cb503a..496697056 100644 --- a/doc/build/core/metadata.rst +++ b/doc/build/core/metadata.rst @@ -604,6 +604,8 @@ Column, Table, MetaData API .. autoclass:: SchemaItem :members: +.. autofunction:: insert_sentinel + .. autoclass:: Table :members: :inherited-members: diff --git a/doc/build/errors.rst b/doc/build/errors.rst index aa28ea909..ea76a9914 100644 --- a/doc/build/errors.rst +++ b/doc/build/errors.rst @@ -910,7 +910,6 @@ Mitigation of this error is via these techniques: :ref:`session_expire` - background on attribute expiry - .. _error_7s2a: This Session's transaction has been rolled back due to a previous exception during flush diff --git a/doc/build/glossary.rst b/doc/build/glossary.rst index 2fa2b00d9..21c8a6ebf 100644 --- a/doc/build/glossary.rst +++ b/doc/build/glossary.rst @@ -27,6 +27,44 @@ Glossary :ref:`migration_20_toplevel` + sentinel + insert sentinel + This is a SQLAlchemy-specific term that refers to a + :class:`_schema.Column` which can be used for a bulk + :term:`insertmanyvalues` operation to track INSERTed data records + against rows passed back using RETURNING or similar. Such a + column configuration is necessary for those cases when the + :term:`insertmanyvalues` feature does an optimized INSERT..RETURNING + statement for many rows at once while still being able to guarantee the + order of returned rows matches the input data. + + For typical use cases, the SQLAlchemy SQL compiler can automatically + make use of surrogate integer primary key columns as "insert + sentinels", and no user-configuration is required. For less common + cases with other varieties of server-generated primary key values, + explicit "insert sentinel" columns may be optionally configured within + :term:`table metadata` in order to optimize INSERT statements that + are inserting many rows at once. + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - in the section + :ref:`engine_insertmanyvalues` + + insertmanyvalues + This refers to a SQLAlchemy-specific feature which allows INSERT + statements to emit thousands of new rows within a single statement + while at the same time allowing server generated values to be returned + inline from the statement using RETURNING or similar, for performance + optimization purposes. The feature is intended to be transparently + available for selected backends, but does offer some configurational + options. See the section :ref:`engine_insertmanyvalues` for a full + description of this feature. + + .. seealso:: + + :ref:`engine_insertmanyvalues` + mixin class mixin classes @@ -403,6 +441,8 @@ Glossary `Metadata Mapping (via Martin Fowler) <https://www.martinfowler.com/eaaCatalog/metadataMapping.html>`_ + :ref:`tutorial_working_with_metadata` - in the :ref:`unified_tutorial` + version id column In SQLAlchemy, this refers to the use of a particular table column that tracks the "version" of a particular row, as the row changes values. While diff --git a/doc/build/orm/large_collections.rst b/doc/build/orm/large_collections.rst index afe3c506c..ef1e651e1 100644 --- a/doc/build/orm/large_collections.rst +++ b/doc/build/orm/large_collections.rst @@ -152,12 +152,18 @@ source of objects to start, where below we use a Python ``list``:: {execsql}BEGIN (implicit) INSERT INTO account (identifier) VALUES (?) [...] ('account_01',) - INSERT INTO account_transaction (account_id, description, amount, timestamp) VALUES - (?, ?, ?, CURRENT_TIMESTAMP), (?, ?, ?, CURRENT_TIMESTAMP), (?, ?, ?, CURRENT_TIMESTAMP) - RETURNING id, timestamp - [...] (1, 'initial deposit', 500.0, 1, 'transfer', 1000.0, 1, 'withdrawal', -29.5) + INSERT INTO account_transaction (account_id, description, amount, timestamp) + VALUES (?, ?, ?, CURRENT_TIMESTAMP) RETURNING id, timestamp + [... (insertmanyvalues) 1/3 (ordered; batch not supported)] (1, 'initial deposit', 500.0) + INSERT INTO account_transaction (account_id, description, amount, timestamp) + VALUES (?, ?, ?, CURRENT_TIMESTAMP) RETURNING id, timestamp + [insertmanyvalues 2/3 (ordered; batch not supported)] (1, 'transfer', 1000.0) + INSERT INTO account_transaction (account_id, description, amount, timestamp) + VALUES (?, ?, ?, CURRENT_TIMESTAMP) RETURNING id, timestamp + [insertmanyvalues 3/3 (ordered; batch not supported)] (1, 'withdrawal', -29.5) COMMIT + Once an object is database-persisted (i.e. in the :term:`persistent` or :term:`detached` state), the collection has the ability to be extended with new items as well as the ability for individual items to be removed. However, the @@ -199,11 +205,14 @@ methods:: ... ) >>> session.commit() {execsql}INSERT INTO account_transaction (account_id, description, amount, timestamp) - VALUES (?, ?, ?, CURRENT_TIMESTAMP), (?, ?, ?, CURRENT_TIMESTAMP) - RETURNING id, timestamp - [...] (1, 'paycheck', 2000.0, 1, 'rent', -800.0) + VALUES (?, ?, ?, CURRENT_TIMESTAMP) RETURNING id, timestamp + [... (insertmanyvalues) 1/2 (ordered; batch not supported)] (1, 'paycheck', 2000.0) + INSERT INTO account_transaction (account_id, description, amount, timestamp) + VALUES (?, ?, ?, CURRENT_TIMESTAMP) RETURNING id, timestamp + [insertmanyvalues 2/2 (ordered; batch not supported)] (1, 'rent', -800.0) COMMIT + The items added above are held in a pending queue within the :class:`_orm.Session` until the next flush, at which point they are INSERTed into the database, assuming the added objects were previously :term:`transient`. diff --git a/doc/build/orm/mapping_api.rst b/doc/build/orm/mapping_api.rst index 8260fcb2f..57ef5e00e 100644 --- a/doc/build/orm/mapping_api.rst +++ b/doc/build/orm/mapping_api.rst @@ -133,6 +133,8 @@ Class Mapping API .. autofunction:: polymorphic_union +.. autofunction:: orm_insert_sentinel + .. autofunction:: reconstructor .. autoclass:: Mapper diff --git a/doc/build/orm/queryguide/dml.rst b/doc/build/orm/queryguide/dml.rst index 606b7ccef..b836e6984 100644 --- a/doc/build/orm/queryguide/dml.rst +++ b/doc/build/orm/queryguide/dml.rst @@ -105,7 +105,7 @@ Getting new objects with RETURNING The bulk ORM insert feature supports INSERT..RETURNING for selected backends, which can return a :class:`.Result` object that may yield individual columns back as well as fully constructed ORM objects corresponding -to the new rows. INSERT..RETURNING requires the use of a backend that +to the newly generated records. INSERT..RETURNING requires the use of a backend that supports SQL RETURNING syntax as well as support for :term:`executemany` with RETURNING; this feature is available with all :ref:`SQLAlchemy-included <included_dialects>` backends @@ -127,9 +127,10 @@ iteration of ``User`` objects:: ... ], ... ) {execsql}INSERT INTO user_account (name, fullname) - VALUES (?, ?), (?, ?), (?, ?), (?, ?), (?, ?) RETURNING id, name, fullname, species - [... (insertmanyvalues)] ('spongebob', 'Spongebob Squarepants', 'sandy', - 'Sandy Cheeks', 'patrick', 'Patrick Star', 'squidward', 'Squidward Tentacles', + VALUES (?, ?), (?, ?), (?, ?), (?, ?), (?, ?) + RETURNING id, name, fullname, species + [...] ('spongebob', 'Spongebob Squarepants', 'sandy', 'Sandy Cheeks', + 'patrick', 'Patrick Star', 'squidward', 'Squidward Tentacles', 'ehkrabs', 'Eugene H. Krabs') {stop}>>> print(users.all()) [User(name='spongebob', fullname='Spongebob Squarepants'), @@ -154,6 +155,53 @@ single INSERT statement so that RETURNING may be used. options such as :func:`_orm.load_only` and :func:`_orm.selectinload` is also present. +.. _orm_queryguide_bulk_insert_returning_ordered: + +Correlating RETURNING records with input data order +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When using bulk INSERT with RETURNING, it's important to note that most +database backends provide no formal guarantee of the order in which the +records from RETURNING are sent, including that there is no guarantee that +their order will correspond to that of the input records. For applications +that need to ensure RETURNING records can be correlated with input data, +the additional parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` +may be specified, which depending on backend may use special INSERT forms +that maintain a token which is used to reorder the returned rows appropriately, +or in some cases, such as in the example below using the SQLite backend, +the operation will INSERT one row at a time:: + + >>> data = [ + ... {"name": "pearl", "fullname": "Pearl Krabs"}, + ... {"name": "plankton", "fullname": "Plankton"}, + ... {"name": "gary", "fullname": "Gary"}, + ... ] + >>> user_ids = session.scalars( + ... insert(User).returning(User.id, sort_by_parameter_order=True), data + ... ) + {execsql}INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [... (insertmanyvalues) 1/3 (ordered; batch not supported)] ('pearl', 'Pearl Krabs') + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [insertmanyvalues 2/3 (ordered; batch not supported)] ('plankton', 'Plankton') + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [insertmanyvalues 3/3 (ordered; batch not supported)] ('gary', 'Gary') + {stop}>>> for user_id, input_record in zip(user_ids, data): + ... input_record["id"] = user_id + >>> print(data) + [{'name': 'pearl', 'fullname': 'Pearl Krabs', 'id': 6}, + {'name': 'plankton', 'fullname': 'Plankton', 'id': 7}, + {'name': 'gary', 'fullname': 'Gary', 'id': 8}] + +.. versionadded:: 2.0.10 Added :paramref:`_dml.Insert.returning.sort_by_parameter_order` + which is implemented within the :term:`insertmanyvalues` architecture. + +.. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on approaches + taken to guarantee correspondence between input data and result rows + without significant loss of performance + + .. _orm_queryguide_insert_heterogeneous_params: Using Heterogenous Parameter Dictionaries @@ -190,12 +238,19 @@ to each set of keys and batch accordingly into separate INSERT statements:: ... {"name": "ehkrabs", "fullname": "Eugene H. Krabs", "species": "Crab"}, ... ], ... ) - {execsql}INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?), (?, ?, ?) RETURNING id, name, fullname, species - [... (insertmanyvalues)] ('spongebob', 'Spongebob Squarepants', 'Sea Sponge', 'sandy', 'Sandy Cheeks', 'Squirrel') - INSERT INTO user_account (name, species) VALUES (?, ?) RETURNING id, name, fullname, species + {execsql}INSERT INTO user_account (name, fullname, species) + VALUES (?, ?, ?), (?, ?, ?) RETURNING id, name, fullname, species + [... (insertmanyvalues) 1/1 (unordered)] ('spongebob', 'Spongebob Squarepants', 'Sea Sponge', + 'sandy', 'Sandy Cheeks', 'Squirrel') + INSERT INTO user_account (name, species) + VALUES (?, ?) RETURNING id, name, fullname, species [...] ('patrick', 'Starfish') - INSERT INTO user_account (name, fullname, species) VALUES (?, ?, ?), (?, ?, ?) RETURNING id, name, fullname, species - [... (insertmanyvalues)] ('squidward', 'Squidward Tentacles', 'Squid', 'ehkrabs', 'Eugene H. Krabs', 'Crab') + INSERT INTO user_account (name, fullname, species) + VALUES (?, ?, ?), (?, ?, ?) RETURNING id, name, fullname, species + [... (insertmanyvalues) 1/1 (unordered)] ('squidward', 'Squidward Tentacles', + 'Squid', 'ehkrabs', 'Eugene H. Krabs', 'Crab') + + In the above example, the five parameter dictionaries passed translated into three INSERT statements, grouped along the specific sets of keys @@ -232,12 +287,22 @@ the returned rows include values for all columns inserted:: ... {"name": "ehkrabs", "manager_name": "Eugene H. Krabs"}, ... ], ... ) - {execsql}INSERT INTO employee (name, type) VALUES (?, ?), (?, ?) RETURNING id, name, type - [... (insertmanyvalues)] ('sandy', 'manager', 'ehkrabs', 'manager') - INSERT INTO manager (id, manager_name) VALUES (?, ?), (?, ?) RETURNING id, manager_name - [... (insertmanyvalues)] (1, 'Sandy Cheeks', 2, 'Eugene H. Krabs') - {stop}>>> print(managers.all()) - [Manager('sandy', manager_name='Sandy Cheeks'), Manager('ehkrabs', manager_name='Eugene H. Krabs')] + {execsql}INSERT INTO employee (name, type) VALUES (?, ?) RETURNING id, name, type + [... (insertmanyvalues) 1/2 (ordered; batch not supported)] ('sandy', 'manager') + INSERT INTO employee (name, type) VALUES (?, ?) RETURNING id, name, type + [insertmanyvalues 2/2 (ordered; batch not supported)] ('ehkrabs', 'manager') + INSERT INTO manager (id, manager_name) VALUES (?, ?), (?, ?) RETURNING id, manager_name, id AS id__1 + [... (insertmanyvalues) 1/1 (ordered)] (1, 'Sandy Cheeks', 2, 'Eugene H. Krabs') + +.. tip:: Bulk INSERT of joined inheritance mappings requires that the ORM + make use of the :paramref:`_dml.Insert.returning.sort_by_parameter_order` + parameter internally, so that it can correlate primary key values from + RETURNING rows from the base table into the parameter sets being used + to INSERT into the "sub" table, which is why the SQLite backend + illustrated above transparently degrades to using non-batched statements. + Background on this feature is at + :ref:`engine_insertmanyvalues_returning_order`. + .. _orm_queryguide_bulk_insert_w_sql: @@ -281,11 +346,12 @@ and then pass the additional records using "bulk" mode:: ... ], ... ) {execsql}INSERT INTO log_record (message, code, timestamp) - VALUES (?, ?, CURRENT_TIMESTAMP), (?, ?, CURRENT_TIMESTAMP), (?, ?, CURRENT_TIMESTAMP), - (?, ?, CURRENT_TIMESTAMP) + VALUES (?, ?, CURRENT_TIMESTAMP), (?, ?, CURRENT_TIMESTAMP), + (?, ?, CURRENT_TIMESTAMP), (?, ?, CURRENT_TIMESTAMP) RETURNING id, message, code, timestamp - [... (insertmanyvalues)] ('log message #1', 'SQLA', 'log message #2', 'SQLA', - 'log message #3', 'SQLA', 'log message #4', 'SQLA') + [... (insertmanyvalues) 1/1 (unordered)] ('log message #1', 'SQLA', 'log message #2', + 'SQLA', 'log message #3', 'SQLA', 'log message #4', 'SQLA') + {stop}>>> print(log_record_result.all()) [LogRecord('log message #1', 'SQLA', datetime.datetime(...)), diff --git a/doc/build/orm/quickstart.rst b/doc/build/orm/quickstart.rst index eee913588..652ed235a 100644 --- a/doc/build/orm/quickstart.rst +++ b/doc/build/orm/quickstart.rst @@ -221,10 +221,18 @@ is used: ... ... session.commit() {execsql}BEGIN (implicit) - INSERT INTO user_account (name, fullname) VALUES (?, ?), (?, ?), (?, ?) RETURNING id - [...] ('spongebob', 'Spongebob Squarepants', 'sandy', 'Sandy Cheeks', 'patrick', 'Patrick Star') - INSERT INTO address (email_address, user_id) VALUES (?, ?), (?, ?), (?, ?) RETURNING id - [...] ('spongebob@sqlalchemy.org', 1, 'sandy@sqlalchemy.org', 2, 'sandy@squirrelpower.org', 2) + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [...] ('spongebob', 'Spongebob Squarepants') + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [...] ('sandy', 'Sandy Cheeks') + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [...] ('patrick', 'Patrick Star') + INSERT INTO address (email_address, user_id) VALUES (?, ?) RETURNING id + [...] ('spongebob@sqlalchemy.org', 1) + INSERT INTO address (email_address, user_id) VALUES (?, ?) RETURNING id + [...] ('sandy@sqlalchemy.org', 2) + INSERT INTO address (email_address, user_id) VALUES (?, ?) RETURNING id + [...] ('sandy@squirrelpower.org', 2) COMMIT diff --git a/doc/build/tutorial/data_insert.rst b/doc/build/tutorial/data_insert.rst index 07e5f9032..d0f6b236d 100644 --- a/doc/build/tutorial/data_insert.rst +++ b/doc/build/tutorial/data_insert.rst @@ -268,6 +268,12 @@ as in the example below that builds upon the example stated in that are included in SQLAlchemy which support RETURNING. See the section :ref:`engine_insertmanyvalues` for background on this feature. +.. seealso:: + + Bulk INSERT with or without RETURNING is also supported by the ORM. See + :ref:`orm_queryguide_bulk_insert` for reference documentation. + + .. _tutorial_insert_from_select: diff --git a/doc/build/tutorial/orm_data_manipulation.rst b/doc/build/tutorial/orm_data_manipulation.rst index ab16c5856..73fef50ab 100644 --- a/doc/build/tutorial/orm_data_manipulation.rst +++ b/doc/build/tutorial/orm_data_manipulation.rst @@ -124,8 +124,12 @@ method: >>> session.flush() {execsql}BEGIN (implicit) - INSERT INTO user_account (name, fullname) VALUES (?, ?), (?, ?) RETURNING id - [...] ('squidward', 'Squidward Tentacles', 'ehkrabs', 'Eugene H. Krabs') + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [... (insertmanyvalues) 1/2 (ordered; batch not supported)] ('squidward', 'Squidward Tentacles') + INSERT INTO user_account (name, fullname) VALUES (?, ?) RETURNING id + [insertmanyvalues 2/2 (ordered; batch not supported)] ('ehkrabs', 'Eugene H. Krabs') + + Above we observe the :class:`_orm.Session` was first called upon to emit SQL, diff --git a/doc/build/tutorial/orm_related_objects.rst b/doc/build/tutorial/orm_related_objects.rst index 69224ba78..7f564b345 100644 --- a/doc/build/tutorial/orm_related_objects.rst +++ b/doc/build/tutorial/orm_related_objects.rst @@ -199,11 +199,15 @@ newly generated primary key of the ``user_account`` row is applied to the >>> session.commit() {execsql}INSERT INTO user_account (name, fullname) VALUES (?, ?) [...] ('pkrabs', 'Pearl Krabs') - INSERT INTO address (email_address, user_id) VALUES (?, ?), (?, ?) RETURNING id - [...] ('pearl.krabs@gmail.com', 6, 'pearl@aol.com', 6) + INSERT INTO address (email_address, user_id) VALUES (?, ?) RETURNING id + [... (insertmanyvalues) 1/2 (ordered; batch not supported)] ('pearl.krabs@gmail.com', 6) + INSERT INTO address (email_address, user_id) VALUES (?, ?) RETURNING id + [insertmanyvalues 2/2 (ordered; batch not supported)] ('pearl@aol.com', 6) COMMIT + + .. _tutorial_loading_relationships: Loading Relationships diff --git a/lib/sqlalchemy/__init__.py b/lib/sqlalchemy/__init__.py index 86c5afd8f..ad80a33c9 100644 --- a/lib/sqlalchemy/__init__.py +++ b/lib/sqlalchemy/__init__.py @@ -73,6 +73,7 @@ from .schema import ForeignKey as ForeignKey from .schema import ForeignKeyConstraint as ForeignKeyConstraint from .schema import Identity as Identity from .schema import Index as Index +from .schema import insert_sentinel as insert_sentinel from .schema import MetaData as MetaData from .schema import PrimaryKeyConstraint as PrimaryKeyConstraint from .schema import Sequence as Sequence diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py index f32665792..4a7e48ab8 100644 --- a/lib/sqlalchemy/dialects/mssql/base.py +++ b/lib/sqlalchemy/dialects/mssql/base.py @@ -251,12 +251,16 @@ The process for fetching this value has several variants: INSERT INTO t (x) OUTPUT inserted.id VALUES (?) - .. note:: SQLAlchemy 2.0 introduced the :ref:`engine_insertmanyvalues` - feature for SQL Server, which is used by default to optimize many-row - INSERT statements; however as of SQLAlchemy 2.0.9 this feature is - temporarily disabled for SQL Server, until adjustments can be made - so that the ORM unit of work does not rely upon the ordering of returned - rows. + As of SQLAlchemy 2.0, the :ref:`engine_insertmanyvalues` feature is also + used by default to optimize many-row INSERT statements; for SQL Server + the feature takes place for both RETURNING and-non RETURNING + INSERT statements. + + .. versionchanged:: 2.0.10 The :ref:`engine_insertmanyvalues` feature for + SQL Server was temporarily disabled for SQLAlchemy version 2.0.9 due to + issues with row ordering. As of 2.0.10 the feature is re-enabled, with + special case handling for the unit of work's requirement for RETURNING to + be ordered. * When RETURNING is not available or has been disabled via ``implicit_returning=False``, either the ``scope_identity()`` function or @@ -936,6 +940,7 @@ from ...sql import roles from ...sql import sqltypes from ...sql import util as sql_util from ...sql._typing import is_sql_compiler +from ...sql.compiler import InsertmanyvaluesSentinelOpts from ...types import BIGINT from ...types import BINARY from ...types import CHAR @@ -1514,6 +1519,39 @@ class MSUUid(sqltypes.Uuid): return process + def _sentinel_value_resolver(self, dialect): + """Return a callable that will receive the uuid object or string + as it is normally passed to the DB in the parameter set, after + bind_processor() is called. Convert this value to match + what it would be as coming back from an INSERT..OUTPUT inserted. + + for the UUID type, there are four varieties of settings so here + we seek to convert to the string or UUID representation that comes + back from the driver. + + """ + character_based_uuid = ( + not dialect.supports_native_uuid or not self.native_uuid + ) + + if character_based_uuid: + if self.native_uuid: + # for pyodbc, uuid.uuid() objects are accepted for incoming + # data, as well as strings. but the driver will always return + # uppercase strings in result sets. + def process(value): + return str(value).upper() + + else: + + def process(value): + return str(value) + + return process + else: + # for pymssql, we get uuid.uuid() objects back. + return None + class UNIQUEIDENTIFIER(sqltypes.Uuid[sqltypes._UUID_RETURN]): __visit_name__ = "UNIQUEIDENTIFIER" @@ -2995,13 +3033,18 @@ class MSDialect(default.DefaultDialect): # may be changed at server inspection time for older SQL server versions supports_multivalues_insert = True - # disabled due to #9603 - use_insertmanyvalues = False + use_insertmanyvalues = True # note pyodbc will set this to False if fast_executemany is set, # as of SQLAlchemy 2.0.9 use_insertmanyvalues_wo_returning = True + insertmanyvalues_implicit_sentinel = ( + InsertmanyvaluesSentinelOpts.AUTOINCREMENT + | InsertmanyvaluesSentinelOpts.IDENTITY + | InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + ) + # "The incoming request has too many parameters. The server supports a " # "maximum of 2100 parameters." # in fact you can have 2099 parameters. @@ -3064,14 +3107,6 @@ class MSDialect(default.DefaultDialect): super().__init__(**opts) - if self.use_insertmanyvalues: - raise exc.ArgumentError( - "The use_insertmanyvalues feature on SQL Server is currently " - "not safe to use, as returned result rows may be returned in " - "random order. Ensure use_insertmanyvalues is left at its " - "default of False (this setting changed in SQLAlchemy 2.0.9)" - ) - self._json_serializer = json_serializer self._json_deserializer = json_deserializer diff --git a/lib/sqlalchemy/dialects/mssql/pyodbc.py b/lib/sqlalchemy/dialects/mssql/pyodbc.py index 08c6bc48f..6af527e73 100644 --- a/lib/sqlalchemy/dialects/mssql/pyodbc.py +++ b/lib/sqlalchemy/dialects/mssql/pyodbc.py @@ -290,19 +290,6 @@ Pyodbc have been resolved as of SQLAlchemy 2.0.5. See the notes at Fast Executemany Mode --------------------- - .. note:: SQLAlchemy 2.0 introduced the :ref:`engine_insertmanyvalues` - feature for SQL Server, which is used by default to optimize many-row - INSERT statements; however as of SQLAlchemy 2.0.9 this feature had - to be turned off for SQL Server as the database does not support - deterministic RETURNING of INSERT rows for a multi-row INSERT statement. - -.. versionchanged:: 2.0.9 - ``fast_executemany`` executions will be used - for INSERT statements that don't include RETURNING, when - ``fast_executemany`` is set. Previously, ``use_insertmanyvalues`` would - cause ``fast_executemany`` to not be used in most cases. - - ``use_insertmanyvalues`` is disabled for SQL Server overall as of 2.0.9. - The PyODBC driver includes support for a "fast executemany" mode of execution which greatly reduces round trips for a DBAPI ``executemany()`` call when using Microsoft ODBC drivers, for **limited size batches that fit in memory**. The @@ -316,6 +303,12 @@ Server dialect supports this parameter by passing the "mssql+pyodbc://scott:tiger@mssql2017:1433/test?driver=ODBC+Driver+17+for+SQL+Server", fast_executemany=True) +.. versionchanged:: 2.0.9 - the ``fast_executemany`` parameter now has its + intended effect of this PyODBC feature taking effect for all INSERT + statements that are executed with multiple parameter sets, which don't + include RETURNING. Previously, SQLAlchemy 2.0's :term:`insertmanyvalues` + feature would cause ``fast_executemany`` to not be used in most cases + even if specified. .. versionadded:: 1.3 diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index b5a5b2ca4..eb9ccc606 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -1062,6 +1062,7 @@ from ...sql import operators from ...sql import roles from ...sql import sqltypes from ...sql import util as sql_util +from ...sql.compiler import InsertmanyvaluesSentinelOpts from ...types import BINARY from ...types import BLOB from ...types import BOOLEAN @@ -2414,6 +2415,9 @@ class MySQLDialect(default.DefaultDialect): supports_default_metavalue = True use_insertmanyvalues: bool = True + insertmanyvalues_implicit_sentinel = ( + InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ) supports_sane_rowcount = True supports_sane_multi_rowcount = False diff --git a/lib/sqlalchemy/dialects/mysql/provision.py b/lib/sqlalchemy/dialects/mysql/provision.py index 36a5e9f54..b7faf7712 100644 --- a/lib/sqlalchemy/dialects/mysql/provision.py +++ b/lib/sqlalchemy/dialects/mysql/provision.py @@ -82,7 +82,9 @@ def _mysql_temp_table_keyword_args(cfg, eng): @upsert.for_db("mariadb") -def _upsert(cfg, table, returning, set_lambda=None): +def _upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): from sqlalchemy.dialects.mysql import insert stmt = insert(table) @@ -93,5 +95,7 @@ def _upsert(cfg, table, returning, set_lambda=None): pk1 = table.primary_key.c[0] stmt = stmt.on_duplicate_key_update({pk1.key: pk1}) - stmt = stmt.returning(*returning) + stmt = stmt.returning( + *returning, sort_by_parameter_order=sort_by_parameter_order + ) return stmt diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py index 0fb6295fa..f6f10c476 100644 --- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py +++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py @@ -543,6 +543,11 @@ class _OracleNumeric(sqltypes.Numeric): return handler +class _OracleUUID(sqltypes.Uuid): + def get_dbapi_type(self, dbapi): + return dbapi.STRING + + class _OracleBinaryFloat(_OracleNumeric): def get_dbapi_type(self, dbapi): return dbapi.NATIVE_FLOAT @@ -878,29 +883,9 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext): and is_sql_compiler(self.compiled) and self.compiled._oracle_returning ): - # create a fake cursor result from the out parameters. unlike - # get_out_parameter_values(), the result-row handlers here will be - # applied at the Result level - - numcols = len(self.out_parameters) - - # [stmt_result for stmt_result in outparam.values] == each - # statement in executemany - # [val for val in stmt_result] == each row for a particular - # statement - initial_buffer = list( - zip( - *[ - [ - val - for stmt_result in self.out_parameters[ - f"ret_{j}" - ].values - for val in stmt_result - ] - for j in range(numcols) - ] - ) + + initial_buffer = self.fetchall_for_returning( + self.cursor, _internal=True ) fetch_strategy = _cursor.FullyBufferedCursorFetchStrategy( @@ -921,6 +906,43 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext): return c + def fetchall_for_returning(self, cursor, *, _internal=False): + compiled = self.compiled + if ( + not _internal + and compiled is None + or not is_sql_compiler(compiled) + or not compiled._oracle_returning + ): + raise NotImplementedError( + "execution context was not prepared for Oracle RETURNING" + ) + + # create a fake cursor result from the out parameters. unlike + # get_out_parameter_values(), the result-row handlers here will be + # applied at the Result level + + numcols = len(self.out_parameters) + + # [stmt_result for stmt_result in outparam.values] == each + # statement in executemany + # [val for val in stmt_result] == each row for a particular + # statement + return list( + zip( + *[ + [ + val + for stmt_result in self.out_parameters[ + f"ret_{j}" + ].values + for val in (stmt_result or ()) + ] + for j in range(numcols) + ] + ) + ) + def get_out_parameter_values(self, out_param_names): # this method should not be called when the compiler has # RETURNING as we've turned the has_out_parameters flag set to @@ -942,6 +964,7 @@ class OracleDialect_cx_oracle(OracleDialect): supports_sane_multi_rowcount = True insert_executemany_returning = True + insert_executemany_returning_sort_by_parameter_order = True update_executemany_returning = True delete_executemany_returning = True @@ -974,6 +997,7 @@ class OracleDialect_cx_oracle(OracleDialect): oracle.RAW: _OracleRaw, sqltypes.Unicode: _OracleUnicodeStringCHAR, sqltypes.NVARCHAR: _OracleUnicodeStringNCHAR, + sqltypes.Uuid: _OracleUUID, oracle.NCLOB: _OracleUnicodeTextNCLOB, oracle.ROWID: _OracleRowid, } diff --git a/lib/sqlalchemy/dialects/postgresql/asyncpg.py b/lib/sqlalchemy/dialects/postgresql/asyncpg.py index 2acc5fea3..d1a52afd6 100644 --- a/lib/sqlalchemy/dialects/postgresql/asyncpg.py +++ b/lib/sqlalchemy/dialects/postgresql/asyncpg.py @@ -131,6 +131,7 @@ from typing import TYPE_CHECKING from . import json from . import ranges +from .array import ARRAY as PGARRAY from .base import _DECIMAL_TYPES from .base import _FLOAT_TYPES from .base import _INT_TYPES @@ -157,6 +158,10 @@ if TYPE_CHECKING: from typing import Iterable +class AsyncpgARRAY(PGARRAY): + render_bind_cast = True + + class AsyncpgString(sqltypes.String): render_bind_cast = True @@ -904,6 +909,7 @@ class PGDialect_asyncpg(PGDialect): PGDialect.colspecs, { sqltypes.String: AsyncpgString, + sqltypes.ARRAY: AsyncpgARRAY, REGCONFIG: AsyncpgREGCONFIG, sqltypes.Time: AsyncpgTime, sqltypes.Date: AsyncpgDate, diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 1ce5600e1..ad5e346b7 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -1468,6 +1468,7 @@ from ...sql import expression from ...sql import roles from ...sql import sqltypes from ...sql import util as sql_util +from ...sql.compiler import InsertmanyvaluesSentinelOpts from ...sql.visitors import InternalTraversal from ...types import BIGINT from ...types import BOOLEAN @@ -2911,6 +2912,12 @@ class PGDialect(default.DefaultDialect): postfetch_lastrowid = False use_insertmanyvalues = True + insertmanyvalues_implicit_sentinel = ( + InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + | InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + | InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS + ) + supports_comments = True supports_constraint_comments = True supports_default_values = True diff --git a/lib/sqlalchemy/dialects/postgresql/provision.py b/lib/sqlalchemy/dialects/postgresql/provision.py index 582157604..87f1c9a4c 100644 --- a/lib/sqlalchemy/dialects/postgresql/provision.py +++ b/lib/sqlalchemy/dialects/postgresql/provision.py @@ -130,7 +130,9 @@ def prepare_for_drop_tables(config, connection): @upsert.for_db("postgresql") -def _upsert(cfg, table, returning, set_lambda=None): +def _upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): from sqlalchemy.dialects.postgresql import insert stmt = insert(table) @@ -144,7 +146,9 @@ def _upsert(cfg, table, returning, set_lambda=None): else: stmt = stmt.on_conflict_do_nothing() - stmt = stmt.returning(*returning) + stmt = stmt.returning( + *returning, sort_by_parameter_order=sort_by_parameter_order + ) return stmt diff --git a/lib/sqlalchemy/dialects/sqlite/provision.py b/lib/sqlalchemy/dialects/sqlite/provision.py index 3f86d5a60..2ed8253ab 100644 --- a/lib/sqlalchemy/dialects/sqlite/provision.py +++ b/lib/sqlalchemy/dialects/sqlite/provision.py @@ -174,7 +174,9 @@ def _reap_sqlite_dbs(url, idents): @upsert.for_db("sqlite") -def _upsert(cfg, table, returning, set_lambda=None): +def _upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): from sqlalchemy.dialects.sqlite import insert stmt = insert(table) @@ -184,5 +186,7 @@ def _upsert(cfg, table, returning, set_lambda=None): else: stmt = stmt.on_conflict_do_nothing() - stmt = stmt.returning(*returning) + stmt = stmt.returning( + *returning, sort_by_parameter_order=sort_by_parameter_order + ) return stmt diff --git a/lib/sqlalchemy/engine/base.py b/lib/sqlalchemy/engine/base.py index ba2c44ed7..dac7c9473 100644 --- a/lib/sqlalchemy/engine/base.py +++ b/lib/sqlalchemy/engine/base.py @@ -2020,13 +2020,8 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): if self._echo: stats = context._get_cache_stats() + " (insertmanyvalues)" - for ( - sub_stmt, - sub_params, - setinputsizes, - batchnum, - totalbatches, - ) in dialect._deliver_insertmanyvalues_batches( + + for imv_batch in dialect._deliver_insertmanyvalues_batches( cursor, str_statement, effective_parameters, @@ -2034,20 +2029,25 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): context, ): - if setinputsizes: + if imv_batch.processed_setinputsizes: try: dialect.do_set_input_sizes( - context.cursor, setinputsizes, context + context.cursor, + imv_batch.processed_setinputsizes, + context, ) except BaseException as e: self._handle_dbapi_exception( e, - sql_util._long_statement(sub_stmt), - sub_params, + sql_util._long_statement(imv_batch.replaced_statement), + imv_batch.replaced_parameters, None, context, ) + sub_stmt = imv_batch.replaced_statement + sub_params = imv_batch.replaced_parameters + if engine_events: for fn in self.dispatch.before_cursor_execute: sub_stmt, sub_params = fn( @@ -2063,11 +2063,20 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): self._log_info(sql_util._long_statement(sub_stmt)) - if batchnum > 1: - stats = ( - f"insertmanyvalues batch {batchnum} " - f"of {totalbatches}" - ) + imv_stats = f""" { + imv_batch.batchnum}/{imv_batch.total_batches} ({ + 'ordered' + if imv_batch.rows_sorted else 'unordered' + }{ + '; batch not supported' + if imv_batch.is_downgraded + else '' + })""" + + if imv_batch.batchnum == 1: + stats += imv_stats + else: + stats = f"insertmanyvalues{imv_stats}" if not self.engine.hide_parameters: self._log_info( @@ -2096,7 +2105,12 @@ class Connection(ConnectionEventsTarget, inspection.Inspectable["Inspector"]): ): break else: - dialect.do_execute(cursor, sub_stmt, sub_params, context) + dialect.do_execute( + cursor, + sub_stmt, + sub_params, + context, + ) except BaseException as e: self._handle_dbapi_exception( diff --git a/lib/sqlalchemy/engine/cursor.py b/lib/sqlalchemy/engine/cursor.py index 1f171ddb0..aaf2c1918 100644 --- a/lib/sqlalchemy/engine/cursor.py +++ b/lib/sqlalchemy/engine/cursor.py @@ -1748,13 +1748,18 @@ class CursorResult(Result[_T]): position in the result. The expected use case here is so that multiple INSERT..RETURNING - statements against different tables can produce a single result - that looks like a JOIN of those two tables. + statements (which definitely need to be sorted) against different + tables can produce a single result that looks like a JOIN of those two + tables. E.g.:: r1 = connection.execute( - users.insert().returning(users.c.user_name, users.c.user_id), + users.insert().returning( + users.c.user_name, + users.c.user_id, + sort_by_parameter_order=True + ), user_values ) @@ -1763,6 +1768,7 @@ class CursorResult(Result[_T]): addresses.c.address_id, addresses.c.address, addresses.c.user_id, + sort_by_parameter_order=True ), address_values ) diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index 462473de2..8992334ee 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -17,6 +17,7 @@ as the base class for their own corresponding classes. from __future__ import annotations import functools +import operator import random import re from time import perf_counter @@ -60,6 +61,7 @@ from ..sql import type_api from ..sql._typing import is_tuple_type from ..sql.base import _NoArg from ..sql.compiler import DDLCompiler +from ..sql.compiler import InsertmanyvaluesSentinelOpts from ..sql.compiler import SQLCompiler from ..sql.elements import quoted_name from ..util.typing import Final @@ -223,6 +225,10 @@ class DefaultDialect(Dialect): use_insertmanyvalues_wo_returning: bool = False + insertmanyvalues_implicit_sentinel: InsertmanyvaluesSentinelOpts = ( + InsertmanyvaluesSentinelOpts.NOT_SUPPORTED + ) + insertmanyvalues_page_size: int = 1000 insertmanyvalues_max_parameters = 32700 @@ -369,13 +375,42 @@ class DefaultDialect(Dialect): and self.delete_returning ) - @property + @util.memoized_property def insert_executemany_returning(self): - return ( - self.insert_returning - and self.supports_multivalues_insert - and self.use_insertmanyvalues - ) + """Default implementation for insert_executemany_returning, if not + otherwise overridden by the specific dialect. + + The default dialect determines "insert_executemany_returning" is + available if the dialect in use has opted into using the + "use_insertmanyvalues" feature. If they haven't opted into that, then + this attribute is False, unless the dialect in question overrides this + and provides some other implementation (such as the Oracle dialect). + + """ + return self.insert_returning and self.use_insertmanyvalues + + @util.memoized_property + def insert_executemany_returning_sort_by_parameter_order(self): + """Default implementation for + insert_executemany_returning_deterministic_order, if not otherwise + overridden by the specific dialect. + + The default dialect determines "insert_executemany_returning" can have + deterministic order only if the dialect in use has opted into using the + "use_insertmanyvalues" feature, which implements deterministic ordering + using client side sentinel columns only by default. The + "insertmanyvalues" feature also features alternate forms that can + use server-generated PK values as "sentinels", but those are only + used if the :attr:`.Dialect.insertmanyvalues_implicit_sentinel` + bitflag enables those alternate SQL forms, which are disabled + by default. + + If the dialect in use hasn't opted into that, then this attribute is + False, unless the dialect in question overrides this and provides some + other implementation (such as the Oracle dialect). + + """ + return self.insert_returning and self.use_insertmanyvalues update_executemany_returning = False delete_executemany_returning = False @@ -725,20 +760,156 @@ class DefaultDialect(Dialect): context = cast(DefaultExecutionContext, context) compiled = cast(SQLCompiler, context.compiled) + imv = compiled._insertmanyvalues + assert imv is not None + is_returning: Final[bool] = bool(compiled.effective_returning) batch_size = context.execution_options.get( "insertmanyvalues_page_size", self.insertmanyvalues_page_size ) + sentinel_value_resolvers = None + if is_returning: - context._insertmanyvalues_rows = result = [] + result: Optional[List[Any]] = [] + context._insertmanyvalues_rows = result + + sort_by_parameter_order = imv.sort_by_parameter_order - for batch_rec in compiled._deliver_insertmanyvalues_batches( - statement, parameters, generic_setinputsizes, batch_size + if imv.num_sentinel_columns: + sentinel_value_resolvers = ( + compiled._imv_sentinel_value_resolvers + ) + else: + sort_by_parameter_order = False + result = None + + for imv_batch in compiled._deliver_insertmanyvalues_batches( + statement, + parameters, + generic_setinputsizes, + batch_size, + sort_by_parameter_order, ): - yield batch_rec + yield imv_batch + if is_returning: - result.extend(cursor.fetchall()) + rows = context.fetchall_for_returning(cursor) + + # I would have thought "is_returning: Final[bool]" + # would have assured this but pylance thinks not + assert result is not None + + if imv.num_sentinel_columns and not imv_batch.is_downgraded: + composite_sentinel = imv.num_sentinel_columns > 1 + if imv.implicit_sentinel: + # for implicit sentinel, which is currently single-col + # integer autoincrement, do a simple sort. + assert not composite_sentinel + result.extend( + sorted(rows, key=operator.itemgetter(-1)) + ) + continue + + # otherwise, create dictionaries to match up batches + # with parameters + assert imv.sentinel_param_keys + + if composite_sentinel: + _nsc = imv.num_sentinel_columns + rows_by_sentinel = { + tuple(row[-_nsc:]): row for row in rows + } + else: + rows_by_sentinel = {row[-1]: row for row in rows} + + if len(rows_by_sentinel) != len(imv_batch.batch): + # see test_insert_exec.py:: + # IMVSentinelTest::test_sentinel_incorrect_rowcount + # for coverage / demonstration + raise exc.InvalidRequestError( + f"Sentinel-keyed result set did not produce " + f"correct number of rows {len(imv_batch.batch)}; " + "produced " + f"{len(rows_by_sentinel)}. Please ensure the " + "sentinel column is fully unique and populated in " + "all cases." + ) + + try: + if composite_sentinel: + if sentinel_value_resolvers: + # composite sentinel (PK) with value resolvers + ordered_rows = [ + rows_by_sentinel[ + tuple( + _resolver(parameters[_spk]) # type: ignore # noqa: E501 + if _resolver + else parameters[_spk] # type: ignore # noqa: E501 + for _resolver, _spk in zip( + sentinel_value_resolvers, + imv.sentinel_param_keys, + ) + ) + ] + for parameters in imv_batch.batch + ] + else: + # composite sentinel (PK) with no value + # resolvers + ordered_rows = [ + rows_by_sentinel[ + tuple( + parameters[_spk] # type: ignore + for _spk in imv.sentinel_param_keys + ) + ] + for parameters in imv_batch.batch + ] + else: + _sentinel_param_key = imv.sentinel_param_keys[0] + if ( + sentinel_value_resolvers + and sentinel_value_resolvers[0] + ): + # single-column sentinel with value resolver + _sentinel_value_resolver = ( + sentinel_value_resolvers[0] + ) + ordered_rows = [ + rows_by_sentinel[ + _sentinel_value_resolver( + parameters[_sentinel_param_key] # type: ignore # noqa: E501 + ) + ] + for parameters in imv_batch.batch + ] + else: + # single-column sentinel with no value resolver + ordered_rows = [ + rows_by_sentinel[ + parameters[_sentinel_param_key] # type: ignore # noqa: E501 + ] + for parameters in imv_batch.batch + ] + except KeyError as ke: + # see test_insert_exec.py:: + # IMVSentinelTest::test_sentinel_cant_match_keys + # for coverage / demonstration + raise exc.InvalidRequestError( + f"Can't match sentinel values in result set to " + f"parameter sets; key {ke.args[0]!r} was not " + "found. " + "There may be a mismatch between the datatype " + "passed to the DBAPI driver vs. that which it " + "returns in a result row. Try using a different " + "datatype, such as integer" + ) from ke + + result.extend(ordered_rows) + + else: + result.extend(rows) def do_executemany(self, cursor, statement, parameters, context=None): cursor.executemany(statement, parameters) @@ -1043,6 +1214,7 @@ class DefaultExecutionContext(ExecutionContext): _empty_dict_params = cast("Mapping[str, Any]", util.EMPTY_DICT) _insertmanyvalues_rows: Optional[List[Tuple[Any, ...]]] = None + _num_sentinel_cols: int = 0 @classmethod def _init_ddl( @@ -1152,6 +1324,17 @@ class DefaultExecutionContext(ExecutionContext): ) elif ( ii + and dml_statement._sort_by_parameter_order + and not self.dialect.insert_executemany_returning_sort_by_parameter_order # noqa: E501 + ): + raise exc.InvalidRequestError( + f"Dialect {self.dialect.dialect_description} with " + f"current server capabilities does not support " + "INSERT..RETURNING with deterministic row ordering " + "when executemany is used" + ) + elif ( + ii and self.dialect.use_insertmanyvalues and not compiled._insertmanyvalues ): @@ -1194,6 +1377,10 @@ class DefaultExecutionContext(ExecutionContext): if len(parameters) > 1: if self.isinsert and compiled._insertmanyvalues: self.execute_style = ExecuteStyle.INSERTMANYVALUES + + imv = compiled._insertmanyvalues + if imv.sentinel_columns is not None: + self._num_sentinel_cols = imv.num_sentinel_columns else: self.execute_style = ExecuteStyle.EXECUTEMANY @@ -1525,6 +1712,9 @@ class DefaultExecutionContext(ExecutionContext): self._is_server_side = False return self.create_default_cursor() + def fetchall_for_returning(self, cursor): + return cursor.fetchall() + def create_default_cursor(self): return self._dbapi_connection.cursor() @@ -1689,6 +1879,13 @@ class DefaultExecutionContext(ExecutionContext): ) if cursor_description is None: strategy = _cursor._NO_CURSOR_DML + elif self._num_sentinel_cols: + assert self.execute_style is ExecuteStyle.INSERTMANYVALUES + if cursor_description: + # strip out the sentinel columns from cursor description + cursor_description = cursor_description[ + 0 : -(self._num_sentinel_cols) + ] result: _cursor.CursorResult[Any] = _cursor.CursorResult( self, strategy, cursor_description @@ -2059,21 +2256,14 @@ class DefaultExecutionContext(ExecutionContext): key_getter = compiled._within_exec_param_key_getter - # pre-determine scalar Python-side defaults - # to avoid many calls of get_insert_default()/ - # get_update_default() + sentinel_counter = 0 + if compiled.insert_prefetch: prefetch_recs = [ ( c, key_getter(c), - ( - c.default.arg, # type: ignore - c.default.is_scalar, - c.default.is_callable, - ) - if c.default and c.default.has_arg - else (None, None, None), + c._default_description_tuple, self.get_insert_default, ) for c in compiled.insert_prefetch @@ -2083,13 +2273,7 @@ class DefaultExecutionContext(ExecutionContext): ( c, key_getter(c), - ( - c.onupdate.arg, # type: ignore - c.onupdate.is_scalar, - c.onupdate.is_callable, - ) - if c.onupdate and c.onupdate.has_arg - else (None, None, None), + c._onupdate_description_tuple, self.get_update_default, ) for c in compiled.update_prefetch @@ -2103,10 +2287,13 @@ class DefaultExecutionContext(ExecutionContext): for ( c, param_key, - (arg, is_scalar, is_callable), + (arg, is_scalar, is_callable, is_sentinel), fallback, ) in prefetch_recs: - if is_scalar: + if is_sentinel: + param[param_key] = sentinel_counter + sentinel_counter += 1 + elif is_scalar: param[param_key] = arg elif is_callable: self.current_column = c diff --git a/lib/sqlalchemy/engine/interfaces.py b/lib/sqlalchemy/engine/interfaces.py index 254aba4bc..0216c155d 100644 --- a/lib/sqlalchemy/engine/interfaces.py +++ b/lib/sqlalchemy/engine/interfaces.py @@ -55,8 +55,10 @@ if TYPE_CHECKING: from ..event import dispatcher from ..exc import StatementError from ..sql import Executable + from ..sql.compiler import _InsertManyValuesBatch from ..sql.compiler import DDLCompiler from ..sql.compiler import IdentifierPreparer + from ..sql.compiler import InsertmanyvaluesSentinelOpts from ..sql.compiler import Linting from ..sql.compiler import SQLCompiler from ..sql.elements import BindParameter @@ -236,14 +238,16 @@ _DBAPIMultiExecuteParams = Union[ _DBAPIAnyExecuteParams = Union[ _DBAPIMultiExecuteParams, _DBAPISingleExecuteParams ] -_DBAPICursorDescription = Tuple[ - str, - "DBAPIType", - Optional[int], - Optional[int], - Optional[int], - Optional[int], - Optional[bool], +_DBAPICursorDescription = Sequence[ + Tuple[ + str, + "DBAPIType", + Optional[int], + Optional[int], + Optional[int], + Optional[int], + Optional[bool], + ] ] _AnySingleExecuteParams = _DBAPISingleExecuteParams @@ -609,9 +613,21 @@ class BindTyping(Enum): aren't. When RENDER_CASTS is used, the compiler will invoke the - :meth:`.SQLCompiler.render_bind_cast` method for each - :class:`.BindParameter` object whose dialect-level type sets the - :attr:`.TypeEngine.render_bind_cast` attribute. + :meth:`.SQLCompiler.render_bind_cast` method for the rendered + string representation of each :class:`.BindParameter` object whose + dialect-level type sets the :attr:`.TypeEngine.render_bind_cast` attribute. + + The :meth:`.SQLCompiler.render_bind_cast` is also used to render casts + for one form of "insertmanyvalues" query, when both + :attr:`.InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT` and + :attr:`.InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS` are set, + where the casts are applied to the intermediary columns e.g. + "INSERT INTO t (a, b, c) SELECT p0::TYP, p1::TYP, p2::TYP " + "FROM (VALUES (?, ?), (?, ?), ...)". + + .. versionadded:: 2.0.10 - :meth:`.SQLCompiler.render_bind_cast` is now + used within some elements of the "insertmanyvalues" implementation. + """ @@ -838,6 +854,14 @@ class Dialect(EventTarget): """ + insert_executemany_returning_sort_by_parameter_order: bool + """dialect / driver / database supports some means of providing + INSERT...RETURNING support when dialect.do_executemany() is used + along with the :paramref:`_dml.Insert.returning.sort_by_parameter_order` + parameter being set. + + """ + update_executemany_returning: bool """dialect supports UPDATE..RETURNING with executemany.""" @@ -881,6 +905,23 @@ class Dialect(EventTarget): .. versionadded:: 2.0 + .. seealso:: + + :ref:`engine_insertmanyvalues` + + """ + + insertmanyvalues_implicit_sentinel: InsertmanyvaluesSentinelOpts + """Options indicating the database supports a form of bulk INSERT where + the autoincrement integer primary key can be reliably used as an ordering + for INSERTed rows. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` + """ insertmanyvalues_page_size: int @@ -2116,15 +2157,7 @@ class Dialect(EventTarget): parameters: _DBAPIMultiExecuteParams, generic_setinputsizes: Optional[_GenericSetInputSizesType], context: ExecutionContext, - ) -> Iterator[ - Tuple[ - str, - _DBAPISingleExecuteParams, - _GenericSetInputSizesType, - int, - int, - ] - ]: + ) -> Iterator[_InsertManyValuesBatch]: """convert executemany parameters for an INSERT into an iterator of statement/single execute values, used by the insertmanyvalues feature. @@ -3112,6 +3145,24 @@ class ExecutionContext: raise NotImplementedError() + def fetchall_for_returning(self, cursor: DBAPICursor) -> Sequence[Any]: + """For a RETURNING result, deliver cursor.fetchall() from the + DBAPI cursor. + + This is a dialect-specific hook for dialects that have special + considerations when calling upon the rows delivered for a + "RETURNING" statement. Default implementation is + ``cursor.fetchall()``. + + This hook is currently used only by the :term:`insertmanyvalues` + feature. Dialects that don't set ``use_insertmanyvalues=True`` + don't need to consider this hook. + + .. versionadded:: 2.0.10 + + """ + raise NotImplementedError() + class ConnectionEventsTarget(EventTarget): """An object which can accept events from :class:`.ConnectionEvents`. diff --git a/lib/sqlalchemy/exc.py b/lib/sqlalchemy/exc.py index 45a7e280e..a5a66de87 100644 --- a/lib/sqlalchemy/exc.py +++ b/lib/sqlalchemy/exc.py @@ -51,17 +51,15 @@ class HasDescriptionCode: self.code = code super().__init__(*arg, **kw) + _what_are_we = "error" + def _code_str(self) -> str: if not self.code: return "" else: return ( - "(Background on this error at: " - "https://sqlalche.me/e/%s/%s)" - % ( - _version_token, - self.code, - ) + f"(Background on this {self._what_are_we} at: " + f"https://sqlalche.me/e/{_version_token}/{self.code})" ) def __str__(self) -> str: @@ -831,3 +829,5 @@ class SAPendingDeprecationWarning(PendingDeprecationWarning): class SAWarning(HasDescriptionCode, RuntimeWarning): """Issued at runtime.""" + + _what_are_we = "warning" diff --git a/lib/sqlalchemy/orm/__init__.py b/lib/sqlalchemy/orm/__init__.py index 69cd7f598..7d70d3c7f 100644 --- a/lib/sqlalchemy/orm/__init__.py +++ b/lib/sqlalchemy/orm/__init__.py @@ -32,6 +32,7 @@ from ._orm_constructors import deferred as deferred from ._orm_constructors import dynamic_loader as dynamic_loader from ._orm_constructors import join as join from ._orm_constructors import mapped_column as mapped_column +from ._orm_constructors import orm_insert_sentinel as orm_insert_sentinel from ._orm_constructors import outerjoin as outerjoin from ._orm_constructors import query_expression as query_expression from ._orm_constructors import relationship as relationship diff --git a/lib/sqlalchemy/orm/_orm_constructors.py b/lib/sqlalchemy/orm/_orm_constructors.py index a0fa4da79..563fef3c5 100644 --- a/lib/sqlalchemy/orm/_orm_constructors.py +++ b/lib/sqlalchemy/orm/_orm_constructors.py @@ -42,6 +42,7 @@ from ..exc import InvalidRequestError from ..sql._typing import _no_kw from ..sql.base import _NoArg from ..sql.base import SchemaEventTarget +from ..sql.schema import _InsertSentinelColumnDefault from ..sql.schema import SchemaConst from ..sql.selectable import FromClause from ..util.typing import Annotated @@ -68,7 +69,7 @@ if TYPE_CHECKING: from ..sql._typing import _OnClauseArgument from ..sql._typing import _TypeEngineArgument from ..sql.elements import ColumnElement - from ..sql.schema import _ServerDefaultType + from ..sql.schema import _ServerDefaultArgument from ..sql.schema import FetchedValue from ..sql.selectable import Alias from ..sql.selectable import Subquery @@ -124,7 +125,7 @@ def mapped_column( info: Optional[_InfoType] = None, onupdate: Optional[Any] = None, insert_default: Optional[Any] = _NoArg.NO_ARG, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, active_history: bool = False, quote: Optional[bool] = None, @@ -334,6 +335,56 @@ def mapped_column( ) +def orm_insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[Any]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> MappedColumn[Any]: + """Provides a surrogate :func:`_orm.mapped_column` that generates + a so-called :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that don't + otherwise have qualifying primary key configurations. + + Use of :func:`_orm.orm_insert_sentinel` is analogous to the use of the + :func:`_schema.insert_sentinel` construct within a Core + :class:`_schema.Table` construct. + + Guidelines for adding this construct to a Declarative mapped class + are the same as that of the :func:`_schema.insert_sentinel` construct; + the database table itself also needs to have a column with this name + present. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + .. seealso:: + + :func:`_schema.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + + return mapped_column( + name=name, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + use_existing_column=True, + nullable=True, + ) + + @util.deprecated_params( **{ arg: ( diff --git a/lib/sqlalchemy/orm/bulk_persistence.py b/lib/sqlalchemy/orm/bulk_persistence.py index c096dc3e5..8388d3980 100644 --- a/lib/sqlalchemy/orm/bulk_persistence.py +++ b/lib/sqlalchemy/orm/bulk_persistence.py @@ -131,19 +131,24 @@ def _bulk_insert( return_result: Optional[cursor.CursorResult[Any]] = None - for table, super_mapper in base_mapper._sorted_tables.items(): - if not mapper.isa(super_mapper) or table not in mapper._pks_by_table: - continue + mappers_to_run = [ + (table, mp) + for table, mp in base_mapper._sorted_tables.items() + if table in mapper._pks_by_table + ] + + if return_defaults: + # not used by new-style bulk inserts, only used for legacy + bookkeeping = True + elif len(mappers_to_run) > 1: + # if we have more than one table, mapper to run where we will be + # either horizontally splicing, or copying values between tables, + # we need the "bookkeeping" / deterministic returning order + bookkeeping = True + else: + bookkeeping = False - is_joined_inh_supertable = super_mapper is not mapper - bookkeeping = ( - is_joined_inh_supertable - or return_defaults - or ( - use_orm_insert_stmt is not None - and bool(use_orm_insert_stmt._returning) - ) - ) + for table, super_mapper in mappers_to_run: records = ( ( @@ -173,6 +178,7 @@ def _bulk_insert( render_nulls=render_nulls, ) ) + result = persistence._emit_insert_statements( base_mapper, None, @@ -187,6 +193,7 @@ def _bulk_insert( if not use_orm_insert_stmt._returning or return_result is None: return_result = result elif result.returns_rows: + assert bookkeeping return_result = return_result.splice_horizontally(result) if return_defaults and isstates: @@ -507,9 +514,11 @@ class ORMDMLState(AbstractORMCompileState): dml_level_statement = dml_level_statement.return_defaults( # this is a little weird looking, but by passing # primary key as the main list of cols, this tells - # return_defaults to omit server-default cols. Since - # we have cols_to_return, just return what we asked for - # (plus primary key, which ORM persistence needs since + # return_defaults to omit server-default cols (and + # actually all cols, due to some weird thing we should + # clean up in crud.py). + # Since we have cols_to_return, just return what we asked + # for (plus primary key, which ORM persistence needs since # we likely set bookkeeping=True here, which is another # whole thing...). We dont want to clutter the # statement up with lots of other cols the user didn't diff --git a/lib/sqlalchemy/orm/decl_api.py b/lib/sqlalchemy/orm/decl_api.py index ed001023b..2f8289acf 100644 --- a/lib/sqlalchemy/orm/decl_api.py +++ b/lib/sqlalchemy/orm/decl_api.py @@ -241,6 +241,7 @@ class _declared_attr_common: self, fn: Callable[..., Any], cascading: bool = False, + quiet: bool = False, ): # suppport # @declared_attr @@ -254,6 +255,7 @@ class _declared_attr_common: self.fget = fn self._cascading = cascading + self._quiet = quiet self.__doc__ = fn.__doc__ def _collect_return_annotation(self) -> Optional[Type[Any]]: diff --git a/lib/sqlalchemy/orm/decl_base.py b/lib/sqlalchemy/orm/decl_base.py index beede0ddb..b7d6dd8cf 100644 --- a/lib/sqlalchemy/orm/decl_base.py +++ b/lib/sqlalchemy/orm/decl_base.py @@ -459,6 +459,7 @@ class _ClassScanMapperConfig(_MapperConfig): "mapper_args", "mapper_args_fn", "inherits", + "single", "allow_dataclass_fields", "dataclass_setup_arguments", "is_dataclass_prior_to_mapping", @@ -483,6 +484,7 @@ class _ClassScanMapperConfig(_MapperConfig): table_args: Optional[_TableArgsType] mapper_args_fn: Optional[Callable[[], Dict[str, Any]]] inherits: Optional[Type[Any]] + single: bool is_dataclass_prior_to_mapping: bool allow_unmapped_annotations: bool @@ -527,7 +529,7 @@ class _ClassScanMapperConfig(_MapperConfig): self.declared_columns = util.OrderedSet() self.column_ordering = {} self.column_copies = {} - + self.single = False self.dataclass_setup_arguments = dca = getattr( self.cls, "_sa_apply_dc_transforms", None ) @@ -866,7 +868,7 @@ class _ClassScanMapperConfig(_MapperConfig): # should only be __table__ continue elif class_mapped: - if _is_declarative_props(obj): + if _is_declarative_props(obj) and not obj._quiet: util.warn( "Regular (i.e. not __special__) " "attribute '%s.%s' uses @declared_attr, " @@ -1783,6 +1785,10 @@ class _ClassScanMapperConfig(_MapperConfig): self.inherits = inherits + clsdict_view = self.clsdict_view + if "__table__" not in clsdict_view and self.tablename is None: + self.single = True + def _setup_inheriting_columns(self, mapper_kw: _MapperKwArgs) -> None: table = self.local_table cls = self.cls @@ -1809,6 +1815,7 @@ class _ClassScanMapperConfig(_MapperConfig): ) if table is None: + # single table inheritance. # ensure no table args if table_args: diff --git a/lib/sqlalchemy/orm/mapper.py b/lib/sqlalchemy/orm/mapper.py index 81b66de03..731983ff4 100644 --- a/lib/sqlalchemy/orm/mapper.py +++ b/lib/sqlalchemy/orm/mapper.py @@ -3273,6 +3273,9 @@ class Mapper( """ + if column is not None and sql_base._never_select_column(column): + return True + # check for class-bound attributes and/or descriptors, # either local or from an inherited class # ignore dataclass field default values diff --git a/lib/sqlalchemy/orm/persistence.py b/lib/sqlalchemy/orm/persistence.py index a12156eb5..1af55df00 100644 --- a/lib/sqlalchemy/orm/persistence.py +++ b/lib/sqlalchemy/orm/persistence.py @@ -955,8 +955,13 @@ def _emit_insert_statements( # if a user query with RETURNING was passed, we definitely need # to use RETURNING. returning_is_required_anyway = bool(use_orm_insert_stmt._returning) + deterministic_results_reqd = ( + returning_is_required_anyway + and use_orm_insert_stmt._sort_by_parameter_order + ) or bookkeeping else: returning_is_required_anyway = False + deterministic_results_reqd = bookkeeping cached_stmt = base_mapper._memo(("insert", table), table.insert) exec_opt = {"compiled_cache": base_mapper._compiled_cache} @@ -1057,22 +1062,31 @@ def _emit_insert_statements( # know that we are using RETURNING in any case records = list(records) - if ( - not hasvalue - and connection.dialect.insert_executemany_returning - and len(records) > 1 + + if returning_is_required_anyway or ( + not hasvalue and len(records) > 1 ): - do_executemany = True - elif returning_is_required_anyway: - if connection.dialect.insert_executemany_returning: + if ( + deterministic_results_reqd + and connection.dialect.insert_executemany_returning_sort_by_parameter_order # noqa: E501 + ) or ( + not deterministic_results_reqd + and connection.dialect.insert_executemany_returning + ): do_executemany = True - else: + elif returning_is_required_anyway: + if deterministic_results_reqd: + dt = " with RETURNING and sort by parameter order" + else: + dt = " with RETURNING" raise sa_exc.InvalidRequestError( f"Can't use explicit RETURNING for bulk INSERT " f"operation with " f"{connection.dialect.dialect_description} backend; " - f"executemany is not supported with RETURNING" + f"executemany{dt} is not enabled for this dialect." ) + else: + do_executemany = False else: do_executemany = False @@ -1084,13 +1098,19 @@ def _emit_insert_statements( ) ): statement = statement.return_defaults( - *mapper._server_default_cols[table] + *mapper._server_default_cols[table], + sort_by_parameter_order=bookkeeping, ) if mapper.version_id_col is not None: - statement = statement.return_defaults(mapper.version_id_col) + statement = statement.return_defaults( + mapper.version_id_col, + sort_by_parameter_order=bookkeeping, + ) elif do_executemany: - statement = statement.return_defaults(*table.primary_key) + statement = statement.return_defaults( + *table.primary_key, sort_by_parameter_order=bookkeeping + ) if do_executemany: multiparams = [rec[2] for rec in records] diff --git a/lib/sqlalchemy/orm/properties.py b/lib/sqlalchemy/orm/properties.py index f00775874..916b9d901 100644 --- a/lib/sqlalchemy/orm/properties.py +++ b/lib/sqlalchemy/orm/properties.py @@ -665,7 +665,11 @@ class MappedColumn( ) -> None: column = self.column - if self._use_existing_column and decl_scan.inherits: + if ( + self._use_existing_column + and decl_scan.inherits + and decl_scan.single + ): if decl_scan.is_deferred: raise sa_exc.ArgumentError( "Can't use use_existing_column with deferred mappers" diff --git a/lib/sqlalchemy/schema.py b/lib/sqlalchemy/schema.py index 392487282..19782bd7c 100644 --- a/lib/sqlalchemy/schema.py +++ b/lib/sqlalchemy/schema.py @@ -60,6 +60,7 @@ from .sql.schema import ForeignKeyConstraint as ForeignKeyConstraint from .sql.schema import HasConditionalDDL as HasConditionalDDL from .sql.schema import Identity as Identity from .sql.schema import Index as Index +from .sql.schema import insert_sentinel as insert_sentinel from .sql.schema import MetaData as MetaData from .sql.schema import PrimaryKeyConstraint as PrimaryKeyConstraint from .sql.schema import SchemaConst as SchemaConst diff --git a/lib/sqlalchemy/sql/_typing.py b/lib/sqlalchemy/sql/_typing.py index 14b1b9594..596493b7c 100644 --- a/lib/sqlalchemy/sql/_typing.py +++ b/lib/sqlalchemy/sql/_typing.py @@ -12,6 +12,7 @@ from typing import Any from typing import Callable from typing import Dict from typing import Mapping +from typing import NoReturn from typing import Set from typing import Tuple from typing import Type @@ -364,3 +365,8 @@ def _no_kw() -> exc.ArgumentError: "Additional keyword arguments are not accepted by this " "function/method. The presence of **kw is for pep-484 typing purposes" ) + + +def _unexpected_kw(methname: str, kw: Dict[str, Any]) -> NoReturn: + k = list(kw)[0] + raise TypeError(f"{methname} got an unexpected keyword argument '{k}'") diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 6267fd814..253927770 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -30,6 +30,7 @@ from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping +from typing import NamedTuple from typing import NoReturn from typing import Optional from typing import overload @@ -75,6 +76,8 @@ if TYPE_CHECKING: from .elements import NamedColumn from .elements import SQLCoreOperations from .elements import TextClause + from .schema import Column + from .schema import DefaultGenerator from .selectable import _JoinTargetElement from .selectable import _SelectIterable from .selectable import FromClause @@ -122,6 +125,35 @@ _Fn = TypeVar("_Fn", bound=Callable[..., Any]) _AmbiguousTableNameMap = MutableMapping[str, str] +class _DefaultDescriptionTuple(NamedTuple): + arg: Any + is_scalar: Optional[bool] + is_callable: Optional[bool] + is_sentinel: Optional[bool] + + @classmethod + def _from_column_default( + cls, default: Optional[DefaultGenerator] + ) -> _DefaultDescriptionTuple: + return ( + _DefaultDescriptionTuple( + default.arg, # type: ignore + default.is_scalar, + default.is_callable, + default.is_sentinel, + ) + if default + and ( + default.has_arg + or (not default.for_update and default.is_sentinel) + ) + else _DefaultDescriptionTuple(None, None, None, None) + ) + + +_never_select_column = operator.attrgetter("_omit_from_statements") + + class _EntityNamespace(Protocol): def __getattr__(self, key: str) -> SQLCoreOperations[Any]: ... @@ -1303,6 +1335,25 @@ class SchemaVisitor(ClauseVisitor): __traverse_options__ = {"schema_visitor": True} +class _SentinelDefaultCharacterization(Enum): + NONE = "none" + UNKNOWN = "unknown" + CLIENTSIDE = "clientside" + SENTINEL_DEFAULT = "sentinel_default" + SERVERSIDE = "serverside" + IDENTITY = "identity" + SEQUENCE = "sequence" + + +class _SentinelColumnCharacterization(NamedTuple): + columns: Optional[Sequence[Column[Any]]] = None + is_explicit: bool = False + is_autoinc: bool = False + default_characterization: _SentinelDefaultCharacterization = ( + _SentinelDefaultCharacterization.NONE + ) + + _COLKEY = TypeVar("_COLKEY", Union[None, str], str) _COL_co = TypeVar("_COL_co", bound="ColumnElement[Any]", covariant=True) diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 9c68b311a..554a84112 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -41,6 +41,7 @@ from typing import ClassVar from typing import Dict from typing import FrozenSet from typing import Iterable +from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping @@ -70,6 +71,7 @@ from ._typing import is_column_element from ._typing import is_dml from .base import _from_objects from .base import _NONE_NAME +from .base import _SentinelDefaultCharacterization from .base import Executable from .base import NO_ARG from .elements import ClauseElement @@ -81,6 +83,7 @@ from .visitors import prefix_anon_map from .visitors import Visitable from .. import exc from .. import util +from ..util import FastIntFlag from ..util.typing import Literal from ..util.typing import Protocol from ..util.typing import TypedDict @@ -100,6 +103,7 @@ if typing.TYPE_CHECKING: from .elements import ColumnElement from .elements import Label from .functions import Function + from .schema import Table from .selectable import AliasedReturnsRows from .selectable import CompoundSelectState from .selectable import CTE @@ -109,9 +113,14 @@ if typing.TYPE_CHECKING: from .selectable import Select from .selectable import SelectState from .type_api import _BindProcessorType + from .type_api import _SentinelProcessorType from ..engine.cursor import CursorResultMetaData from ..engine.interfaces import _CoreSingleExecuteParams + from ..engine.interfaces import _DBAPIAnyExecuteParams + from ..engine.interfaces import _DBAPIMultiExecuteParams + from ..engine.interfaces import _DBAPISingleExecuteParams from ..engine.interfaces import _ExecuteOptions + from ..engine.interfaces import _GenericSetInputSizesType from ..engine.interfaces import _MutableCoreSingleExecuteParams from ..engine.interfaces import Dialect from ..engine.interfaces import SchemaTranslateMapType @@ -460,12 +469,160 @@ class ExpandedState(NamedTuple): class _InsertManyValues(NamedTuple): - """represents state to use for executing an "insertmanyvalues" statement""" + """represents state to use for executing an "insertmanyvalues" statement. + + The primary consumers of this object are the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods. + + .. versionadded:: 2.0 + + """ is_default_expr: bool + """if True, the statement is of the form + ``INSERT INTO TABLE DEFAULT VALUES``, and can't be rewritten as a "batch" + + """ + single_values_expr: str + """The rendered "values" clause of the INSERT statement. + + This is typically the parenthesized section e.g. "(?, ?, ?)" or similar. + The insertmanyvalues logic uses this string as a search and replace + target. + + """ + insert_crud_params: List[crud._CrudParamElementStr] + """List of Column / bind names etc. used while rewriting the statement""" + num_positional_params_counted: int + """the number of bound parameters in a single-row statement. + + This count may be larger or smaller than the actual number of columns + targeted in the INSERT, as it accommodates for SQL expressions + in the values list that may have zero or more parameters embedded + within them. + + This count is part of what's used to organize rewritten parameter lists + when batching. + + """ + + sort_by_parameter_order: bool = False + """if the deterministic_returnined_order parameter were used on the + insert. + + All of the attributes following this will only be used if this is True. + + """ + + includes_upsert_behaviors: bool = False + """if True, we have to accommodate for upsert behaviors. + + This will in some cases downgrade "insertmanyvalues" that requests + deterministic ordering. + + """ + + sentinel_columns: Optional[Sequence[Column[Any]]] = None + """List of sentinel columns that were located. + + This list is only here if the INSERT asked for + sort_by_parameter_order=True, + and dialect-appropriate sentinel columns were located. + + .. versionadded:: 2.0.10 + + """ + + num_sentinel_columns: int = 0 + """how many sentinel columns are in the above list, if any. + + This is the same as + ``len(sentinel_columns) if sentinel_columns is not None else 0`` + + """ + + sentinel_param_keys: Optional[Sequence[Union[str, int]]] = None + """parameter str keys / int indexes in each param dictionary / tuple + that would link to the client side "sentinel" values for that row, which + we can use to match up parameter sets to result rows. + + This is only present if sentinel_columns is present and the INSERT + statement actually refers to client side values for these sentinel + columns. + + .. versionadded:: 2.0.10 + + """ + + implicit_sentinel: bool = False + """if True, we have exactly one sentinel column and it uses a server side + value, currently has to generate an incrementing integer value. + + The dialect in question would have asserted that it supports receiving + these values back and sorting on that value as a means of guaranteeing + correlation with the incoming parameter list. + + .. versionadded:: 2.0.10 + + """ + + embed_values_counter: bool = False + """Whether to embed an incrementing integer counter in each parameter + set within the VALUES clause as parameters are batched over. + + This is only used for a specific INSERT..SELECT..VALUES..RETURNING syntax + where a subquery is used to produce value tuples. Current support + includes PostgreSQL, Microsoft SQL Server. + + .. versionadded:: 2.0.10 + + """ + + +class _InsertManyValuesBatch(NamedTuple): + """represents an individual batch SQL statement for insertmanyvalues. + + This is passed through the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods out + to the :class:`.Connection` within the + :meth:`.Connection._exec_insertmany_context` method. + + .. versionadded:: 2.0.10 + + """ + + replaced_statement: str + replaced_parameters: _DBAPIAnyExecuteParams + processed_setinputsizes: Optional[_GenericSetInputSizesType] + batch: Sequence[_DBAPISingleExecuteParams] + batch_size: int + batchnum: int + total_batches: int + rows_sorted: bool + is_downgraded: bool + + +class InsertmanyvaluesSentinelOpts(FastIntFlag): + """bitflag enum indicating styles of PK defaults + which can work as implicit sentinel columns + + """ + + NOT_SUPPORTED = 1 + AUTOINCREMENT = 2 + IDENTITY = 4 + SEQUENCE = 8 + + ANY_AUTOINCREMENT = AUTOINCREMENT | IDENTITY | SEQUENCE + _SUPPORTED_OR_NOT = NOT_SUPPORTED | ANY_AUTOINCREMENT + + USE_INSERT_FROM_SELECT = 16 + RENDER_SELECT_COL_CASTS = 64 class CompilerState(IntEnum): @@ -1484,6 +1641,7 @@ class SQLCompiler(Compiled): if self._insertmanyvalues: positions = [] + single_values_expr = re.sub( self._positional_pattern, find_position, @@ -1499,13 +1657,19 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( single_values_expr=single_values_expr, insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) def _process_numeric(self): @@ -1574,15 +1738,21 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( # This has the numbers (:1, :2) single_values_expr=single_values_expr, # The single binds are instead %s so they can be formatted insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) @util.memoized_property @@ -1611,6 +1781,23 @@ class SQLCompiler(Compiled): if value is not None } + @util.memoized_property + def _imv_sentinel_value_resolvers( + self, + ) -> Optional[Sequence[Optional[_SentinelProcessorType[Any]]]]: + imv = self._insertmanyvalues + if imv is None or imv.sentinel_columns is None: + return None + + sentinel_value_resolvers = [ + _scol.type._cached_sentinel_value_processor(self.dialect) + for _scol in imv.sentinel_columns + ] + if util.NONE_SET.issuperset(sentinel_value_resolvers): + return None + else: + return sentinel_value_resolvers + def is_subquery(self): return len(self.stack) > 1 @@ -5023,27 +5210,111 @@ class SQLCompiler(Compiled): ) return dialect_hints, table_text - def _insert_stmt_should_use_insertmanyvalues(self, statement): - return ( - self.dialect.supports_multivalues_insert - and self.dialect.use_insertmanyvalues - # note self.implicit_returning or self._result_columns - # implies self.dialect.insert_returning capability - and ( - self.dialect.use_insertmanyvalues_wo_returning - or self.implicit_returning - or self._result_columns + # within the realm of "insertmanyvalues sentinel columns", + # these lookups match different kinds of Column() configurations + # to specific backend capabilities. they are broken into two + # lookups, one for autoincrement columns and the other for non + # autoincrement columns + _sentinel_col_non_autoinc_lookup = util.immutabledict( + { + _SentinelDefaultCharacterization.CLIENTSIDE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.SENTINEL_DEFAULT: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.IDENTITY: ( + InsertmanyvaluesSentinelOpts.IDENTITY + ), + _SentinelDefaultCharacterization.SEQUENCE: ( + InsertmanyvaluesSentinelOpts.SEQUENCE + ), + } + ) + _sentinel_col_autoinc_lookup = _sentinel_col_non_autoinc_lookup.union( + { + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts.AUTOINCREMENT + ), + } + ) + + def _get_sentinel_column_for_table( + self, table: Table + ) -> Optional[Sequence[Column[Any]]]: + """given a :class:`.Table`, return a usable sentinel column or + columns for this dialect if any. + + Return None if no sentinel columns could be identified, or raise an + error if a column was marked as a sentinel explicitly but isn't + compatible with this dialect. + + """ + + sentinel_opts = self.dialect.insertmanyvalues_implicit_sentinel + sentinel_characteristics = table._sentinel_column_characteristics + + sent_cols = sentinel_characteristics.columns + + if sent_cols is None: + return None + + if sentinel_characteristics.is_autoinc: + bitmask = self._sentinel_col_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 ) - ) + else: + bitmask = self._sentinel_col_non_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 + ) + + if sentinel_opts & bitmask: + return sent_cols + + if sentinel_characteristics.is_explicit: + # a column was explicitly marked as insert_sentinel=True, + # however it is not compatible with this dialect. they should + # not indicate this column as a sentinel if they need to include + # this dialect. + + # TODO: do we want non-primary key explicit sentinel cols + # that can gracefully degrade for some backends? + # insert_sentinel="degrade" perhaps. not for the initial release. + # I am hoping people are generally not dealing with this sentinel + # business at all. + + # if is_explicit is True, there will be only one sentinel column. + + raise exc.InvalidRequestError( + f"Column {sent_cols[0]} can't be explicitly " + "marked as a sentinel column when using the " + f"{self.dialect.name} dialect, as the " + "particular type of default generation on this column is " + "not currently compatible with this dialect's specific " + f"INSERT..RETURNING syntax which can receive the " + "server-generated value in " + "a deterministic way. To remove this error, remove " + "insert_sentinel=True from primary key autoincrement " + "columns; these columns are automatically used as " + "sentinels for supported dialects in any case." + ) + + return None def _deliver_insertmanyvalues_batches( - self, statement, parameters, generic_setinputsizes, batch_size - ): + self, + statement: str, + parameters: _DBAPIMultiExecuteParams, + generic_setinputsizes: Optional[_GenericSetInputSizesType], + batch_size: int, + sort_by_parameter_order: bool, + ) -> Iterator[_InsertManyValuesBatch]: imv = self._insertmanyvalues assert imv is not None - executemany_values = f"({imv.single_values_expr})" - lenparams = len(parameters) if imv.is_default_expr and not self.dialect.supports_default_metavalue: # backend doesn't support @@ -5058,19 +5329,41 @@ class SQLCompiler(Compiled): # cursor.lastrowid etc. still goes through the more heavyweight # "ExecutionContext per statement" system as it isn't usable # as a generic "RETURNING" approach - for batchnum, param in enumerate(parameters, 1): - yield ( + use_row_at_a_time = True + downgraded = False + elif not self.dialect.supports_multivalues_insert or ( + sort_by_parameter_order + and self._result_columns + and (imv.sentinel_columns is None or imv.includes_upsert_behaviors) + ): + # deterministic order was requested and the compiler could + # not organize sentinel columns for this dialect/statement. + # use row at a time + use_row_at_a_time = True + downgraded = True + else: + use_row_at_a_time = False + downgraded = False + + if use_row_at_a_time: + for batchnum, param in enumerate( + cast("Sequence[_DBAPISingleExecuteParams]", parameters), 1 + ): + yield _InsertManyValuesBatch( statement, param, generic_setinputsizes, + [param], + batch_size, batchnum, lenparams, + sort_by_parameter_order, + downgraded, ) return - else: - statement = statement.replace( - executemany_values, "__EXECMANY_TOKEN__" - ) + + executemany_values = f"({imv.single_values_expr})" + statement = statement.replace(executemany_values, "__EXECMANY_TOKEN__") # Use optional insertmanyvalues_max_parameters # to further shrink the batch size so that there are no more than @@ -5094,7 +5387,7 @@ class SQLCompiler(Compiled): batches = list(parameters) - processed_setinputsizes = None + processed_setinputsizes: Optional[_GenericSetInputSizesType] = None batchnum = 1 total_batches = lenparams // batch_size + ( 1 if lenparams % batch_size else 0 @@ -5124,10 +5417,14 @@ class SQLCompiler(Compiled): ) return formatted + if imv.embed_values_counter: + imv_values_counter = ", _IMV_VALUES_COUNTER" + else: + imv_values_counter = "" formatted_values_clause = f"""({', '.join( apply_placeholders(bind_keys, formatted) for _, _, formatted, bind_keys in insert_crud_params - )})""" + )}{imv_values_counter})""" keys_to_replace = all_keys.intersection( escaped_bind_names.get(key, key) @@ -5143,7 +5440,13 @@ class SQLCompiler(Compiled): formatted_values_clause = "" keys_to_replace = set() base_parameters = {} - executemany_values_w_comma = f"({imv.single_values_expr}), " + + if imv.embed_values_counter: + executemany_values_w_comma = ( + f"({imv.single_values_expr}, _IMV_VALUES_COUNTER), " + ) + else: + executemany_values_w_comma = f"({imv.single_values_expr}), " all_names_we_will_expand: Set[str] = set() for elem in imv.insert_crud_params: @@ -5176,7 +5479,7 @@ class SQLCompiler(Compiled): ) while batches: - batch = batches[0:batch_size] + batch = cast("Sequence[Any]", batches[0:batch_size]) batches[0:batch_size] = [] if generic_setinputsizes: @@ -5196,7 +5499,7 @@ class SQLCompiler(Compiled): if self.positional: num_ins_params = imv.num_positional_params_counted - batch_iterator: Iterable[Tuple[Any, ...]] + batch_iterator: Iterable[Sequence[Any]] if num_ins_params == len(batch[0]): extra_params_left = extra_params_right = () batch_iterator = batch @@ -5208,9 +5511,19 @@ class SQLCompiler(Compiled): for b in batch ) - expanded_values_string = ( - executemany_values_w_comma * len(batch) - )[:-2] + if imv.embed_values_counter: + expanded_values_string = ( + "".join( + executemany_values_w_comma.replace( + "_IMV_VALUES_COUNTER", str(i) + ) + for i, _ in enumerate(batch) + ) + )[:-2] + else: + expanded_values_string = ( + (executemany_values_w_comma * len(batch)) + )[:-2] if self._numeric_binds and num_ins_params > 0: # numeric will always number the parameters inside of @@ -5254,12 +5567,14 @@ class SQLCompiler(Compiled): replaced_parameters = base_parameters.copy() for i, param in enumerate(batch): - replaced_values_clauses.append( - formatted_values_clause.replace( - "EXECMANY_INDEX__", str(i) - ) + + fmv = formatted_values_clause.replace( + "EXECMANY_INDEX__", str(i) ) + if imv.embed_values_counter: + fmv = fmv.replace("_IMV_VALUES_COUNTER", str(i)) + replaced_values_clauses.append(fmv) replaced_parameters.update( {f"{key}__{i}": param[key] for key in keys_to_replace} ) @@ -5269,12 +5584,16 @@ class SQLCompiler(Compiled): ", ".join(replaced_values_clauses), ) - yield ( + yield _InsertManyValuesBatch( replaced_statement, replaced_parameters, processed_setinputsizes, + batch, + batch_size, batchnum, total_batches, + sort_by_parameter_order, + False, ) batchnum += 1 @@ -5360,6 +5679,13 @@ class SQLCompiler(Compiled): "version settings does not support " "in-place multirow inserts." % self.dialect.name ) + elif ( + self.implicit_returning or insert_stmt._returning + ) and insert_stmt._sort_by_parameter_order: + raise exc.CompileError( + "RETURNING cannot be determinstically sorted when " + "using an INSERT which includes multi-row values()." + ) crud_params_single = crud_params_struct.single_params else: crud_params_single = crud_params_struct.single_params @@ -5390,11 +5716,82 @@ class SQLCompiler(Compiled): [expr for _, expr, _, _ in crud_params_single] ) - if self.implicit_returning or insert_stmt._returning: + # look for insertmanyvalues attributes that would have been configured + # by crud.py as it scanned through the columns to be part of the + # INSERT + use_insertmanyvalues = crud_params_struct.use_insertmanyvalues + named_sentinel_params: Optional[Sequence[str]] = None + add_sentinel_cols = None + implicit_sentinel = False + + returning_cols = self.implicit_returning or insert_stmt._returning + if returning_cols: + + add_sentinel_cols = crud_params_struct.use_sentinel_columns + + if add_sentinel_cols is not None: + assert use_insertmanyvalues + + # search for the sentinel column explicitly present + # in the INSERT columns list, and additionally check that + # this column has a bound parameter name set up that's in the + # parameter list. If both of these cases are present, it means + # we will have a client side value for the sentinel in each + # parameter set. + + _params_by_col = { + col: param_names + for col, _, _, param_names in crud_params_single + } + named_sentinel_params = [] + for _add_sentinel_col in add_sentinel_cols: + if _add_sentinel_col not in _params_by_col: + named_sentinel_params = None + break + param_name = self._within_exec_param_key_getter( + _add_sentinel_col + ) + if param_name not in _params_by_col[_add_sentinel_col]: + named_sentinel_params = None + break + named_sentinel_params.append(param_name) + + if named_sentinel_params is None: + # if we are not going to have a client side value for + # the sentinel in the parameter set, that means it's + # an autoincrement, an IDENTITY, or a server-side SQL + # expression like nextval('seqname'). So this is + # an "implicit" sentinel; we will look for it in + # RETURNING + # only, and then sort on it. For this case on PG, + # SQL Server we have to use a special INSERT form + # that guarantees the server side function lines up with + # the entries in the VALUES. + if ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ): + implicit_sentinel = True + else: + # here, we are not using a sentinel at all + # and we are likely the SQLite dialect. + # The first add_sentinel_col that we have should not + # be marked as "insert_sentinel=True". if it was, + # an error should have been raised in + # _get_sentinel_column_for_table. + assert not add_sentinel_cols[0]._insert_sentinel, ( + "sentinel selection rules should have prevented " + "us from getting here for this dialect" + ) + + # always put the sentinel columns last. even if they are + # in the returning list already, they will be there twice + # then. + returning_cols = list(returning_cols) + list(add_sentinel_cols) returning_clause = self.returning_clause( insert_stmt, - self.implicit_returning or insert_stmt._returning, + returning_cols, populate_result_map=toplevel, ) @@ -5423,9 +5820,8 @@ class SQLCompiler(Compiled): text += " %s" % select_text elif not crud_params_single and supports_default_values: text += " DEFAULT VALUES" - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + self._insertmanyvalues = _InsertManyValues( True, self.dialect.default_metavalue_token, @@ -5433,6 +5829,17 @@ class SQLCompiler(Compiled): "List[crud._CrudParamElementStr]", crud_params_single ), counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + implicit_sentinel=implicit_sentinel, ) elif compile_state._has_multi_parameters: text += " VALUES %s" % ( @@ -5440,11 +5847,9 @@ class SQLCompiler(Compiled): "(%s)" % (", ".join(value for _, _, value, _ in crud_param_set)) for crud_param_set in crud_params_struct.all_multi_params - ) + ), ) else: - # TODO: why is third element of crud_params_single not str - # already? insert_single_values_expr = ", ".join( [ value @@ -5455,20 +5860,90 @@ class SQLCompiler(Compiled): ] ) - text += " VALUES (%s)" % insert_single_values_expr - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + + if ( + implicit_sentinel + and ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + ) + # this is checking if we have + # INSERT INTO table (id) VALUES (DEFAULT). + and not (crud_params_struct.is_default_metavalue_only) + ): + # if we have a sentinel column that is server generated, + # then for selected backends render the VALUES list as a + # subquery. This is the orderable form supported by + # PostgreSQL and SQL Server. + embed_sentinel_value = True + + render_bind_casts = ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS + ) + + colnames = ", ".join( + f"p{i}" for i, _ in enumerate(crud_params_single) + ) + + if render_bind_casts: + # render casts for the SELECT list. For PG, we are + # already rendering bind casts in the parameter list, + # selectively for the more "tricky" types like ARRAY. + # however, even for the "easy" types, if the parameter + # is NULL for every entry, PG gives up and says + # "it must be TEXT", which fails for other easy types + # like ints. So we cast on this side too. + colnames_w_cast = ", ".join( + self.render_bind_cast( + col.type, + col.type._unwrapped_dialect_impl(self.dialect), + f"p{i}", + ) + for i, (col, *_) in enumerate(crud_params_single) + ) + else: + colnames_w_cast = colnames + + text += ( + f" SELECT {colnames_w_cast} FROM " + f"(VALUES ({insert_single_values_expr})) " + f"AS imp_sen({colnames}, sen_counter) " + "ORDER BY sen_counter" + ) + else: + # otherwise, if no sentinel or backend doesn't support + # orderable subquery form, use a plain VALUES list + embed_sentinel_value = False + text += f" VALUES ({insert_single_values_expr})" + self._insertmanyvalues = _InsertManyValues( - False, - insert_single_values_expr, - cast( + is_default_expr=False, + single_values_expr=insert_single_values_expr, + insert_crud_params=cast( "List[crud._CrudParamElementStr]", crud_params_single, ), - counted_bindparam, + num_positional_params_counted=counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + sentinel_param_keys=named_sentinel_params, + implicit_sentinel=implicit_sentinel, + embed_values_counter=embed_sentinel_value, ) + else: + text += f" VALUES ({insert_single_values_expr})" + if insert_stmt._post_values_clause is not None: post_values_clause = self.process( insert_stmt._post_values_clause, **kw diff --git a/lib/sqlalchemy/sql/crud.py b/lib/sqlalchemy/sql/crud.py index 04b62d1ff..563f61c04 100644 --- a/lib/sqlalchemy/sql/crud.py +++ b/lib/sqlalchemy/sql/crud.py @@ -34,6 +34,7 @@ from . import coercions from . import dml from . import elements from . import roles +from .base import _DefaultDescriptionTuple from .dml import isinsert as _compile_state_isinsert from .elements import ColumnClause from .schema import default_is_clause_element @@ -53,6 +54,7 @@ if TYPE_CHECKING: from .elements import ColumnElement from .elements import KeyedColumnElement from .schema import _SQLExprDefault + from .schema import Column REQUIRED = util.symbol( "REQUIRED", @@ -79,20 +81,22 @@ def _as_dml_column(c: ColumnElement[Any]) -> ColumnClause[Any]: _CrudParamElement = Tuple[ "ColumnElement[Any]", - str, - Optional[Union[str, "_SQLExprDefault"]], + str, # column name + Optional[ + Union[str, "_SQLExprDefault"] + ], # bound parameter string or SQL expression to apply Iterable[str], ] _CrudParamElementStr = Tuple[ "KeyedColumnElement[Any]", str, # column name - str, # placeholder + str, # bound parameter string Iterable[str], ] _CrudParamElementSQLExpr = Tuple[ "ColumnClause[Any]", str, - "_SQLExprDefault", + "_SQLExprDefault", # SQL expression to apply Iterable[str], ] @@ -101,8 +105,10 @@ _CrudParamSequence = List[_CrudParamElement] class _CrudParams(NamedTuple): single_params: _CrudParamSequence - all_multi_params: List[Sequence[_CrudParamElementStr]] + is_default_metavalue_only: bool = False + use_insertmanyvalues: bool = False + use_sentinel_columns: Optional[Sequence[Column[Any]]] = None def _get_crud_params( @@ -206,6 +212,7 @@ def _get_crud_params( (c.key,), ) for c in stmt.table.columns + if not c._omit_from_statements ], [], ) @@ -301,8 +308,10 @@ def _get_crud_params( toplevel, kw, ) + use_insertmanyvalues = False + use_sentinel_columns = None else: - _scan_cols( + use_insertmanyvalues, use_sentinel_columns = _scan_cols( compiler, stmt, compile_state, @@ -328,6 +337,8 @@ def _get_crud_params( % (", ".join("%s" % (c,) for c in check)) ) + is_default_metavalue_only = False + if ( _compile_state_isinsert(compile_state) and compile_state._has_multi_parameters @@ -363,8 +374,15 @@ def _get_crud_params( (), ) ] - - return _CrudParams(values, []) + is_default_metavalue_only = True + + return _CrudParams( + values, + [], + is_default_metavalue_only=is_default_metavalue_only, + use_insertmanyvalues=use_insertmanyvalues, + use_sentinel_columns=use_sentinel_columns, + ) @overload @@ -527,7 +545,19 @@ def _scan_insert_from_select_cols( if stmt.include_insert_from_select_defaults: col_set = set(cols) for col in stmt.table.columns: - if col not in col_set and col.default: + # omit columns that were not in the SELECT statement. + # this will omit columns marked as omit_from_statements naturally, + # as long as that col was not explicit in the SELECT. + # if an omit_from_statements col has a "default" on it, then + # we need to include it, as these defaults should still fire off. + # but, if it has that default and it's the "sentinel" default, + # we don't do sentinel default operations for insert_from_select + # here so we again omit it. + if ( + col not in col_set + and col.default + and not col.default.is_sentinel + ): cols.append(col) for c in cols: @@ -579,6 +609,8 @@ def _scan_cols( implicit_returning, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) = _get_returning_modifiers(compiler, stmt, compile_state, toplevel) assert compile_state.isupdate or compile_state.isinsert @@ -672,9 +704,12 @@ def _scan_cols( elif c.default is not None: # column has a default, but it's not a pk column, or it is but # we don't need to get the pk back. - _append_param_insert_hasdefault( - compiler, stmt, c, implicit_return_defaults, values, kw - ) + if not c.default.is_sentinel or ( + use_sentinel_columns is not None + ): + _append_param_insert_hasdefault( + compiler, stmt, c, implicit_return_defaults, values, kw + ) elif c.server_default is not None: # column has a DDL-level default, and is either not a pk @@ -730,6 +765,8 @@ def _scan_cols( if c in remaining_supplemental ) + return (use_insertmanyvalues, use_sentinel_columns) + def _setup_delete_return_defaults( compiler, @@ -744,7 +781,7 @@ def _setup_delete_return_defaults( toplevel, kw, ): - (_, _, implicit_return_defaults, _) = _get_returning_modifiers( + (_, _, implicit_return_defaults, *_) = _get_returning_modifiers( compiler, stmt, compile_state, toplevel ) @@ -1248,6 +1285,18 @@ class _multiparam_column(elements.ColumnElement[Any]): and other.original == self.original ) + @util.memoized_property + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @util.memoized_property + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + def _process_multiparam_default_bind( compiler: SQLCompiler, @@ -1459,16 +1508,15 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): """ + dialect = compiler.dialect + need_pks = ( toplevel and _compile_state_isinsert(compile_state) and not stmt._inline and ( not compiler.for_executemany - or ( - compiler.dialect.insert_executemany_returning - and stmt._return_defaults - ) + or (dialect.insert_executemany_returning and stmt._return_defaults) ) and not stmt._returning # and (not stmt._returning or stmt._return_defaults) @@ -1479,7 +1527,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # after the INSERT if that's all we need. postfetch_lastrowid = ( need_pks - and compiler.dialect.postfetch_lastrowid + and dialect.postfetch_lastrowid and stmt.table._autoincrement_column is not None ) @@ -1491,7 +1539,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): need_pks # the dialect can veto it if it just doesnt support RETURNING # with INSERT - and compiler.dialect.insert_returning + and dialect.insert_returning # user-defined implicit_returning on Table can veto it and compile_state._primary_table.implicit_returning # the compile_state can veto it (SQlite uses this to disable @@ -1506,10 +1554,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # and a lot of weird use cases are supported by it. # SQLite lastrowid times 3x faster than returning, # Mariadb lastrowid 2x faster than returning - ( - not postfetch_lastrowid - or compiler.dialect.favor_returning_over_lastrowid - ) + (not postfetch_lastrowid or dialect.favor_returning_over_lastrowid) or compile_state._has_multi_parameters or stmt._return_defaults ) @@ -1521,25 +1566,57 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): should_implicit_return_defaults = ( implicit_returning and stmt._return_defaults ) + explicit_returning = should_implicit_return_defaults or stmt._returning + use_insertmanyvalues = ( + toplevel + and compiler.for_executemany + and dialect.use_insertmanyvalues + and ( + explicit_returning or dialect.use_insertmanyvalues_wo_returning + ) + ) + + use_sentinel_columns = None + if ( + use_insertmanyvalues + and explicit_returning + and stmt._sort_by_parameter_order + ): + use_sentinel_columns = compiler._get_sentinel_column_for_table( + stmt.table + ) + elif compile_state.isupdate: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.update_returning + and dialect.update_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None elif compile_state.isdelete: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.delete_returning + and dialect.delete_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None else: should_implicit_return_defaults = False # pragma: no cover + use_insertmanyvalues = False + use_sentinel_columns = None if should_implicit_return_defaults: if not stmt._return_defaults_columns: + # TODO: this is weird. See #9685 where we have to + # take an extra step to prevent this from happening. why + # would this ever be *all* columns? but if we set to blank, then + # that seems to break things also in the ORM. So we should + # try to clean this up and figure out what return_defaults + # needs to do w/ the ORM etc. here implicit_return_defaults = set(stmt.table.c) else: implicit_return_defaults = set(stmt._return_defaults_columns) @@ -1551,6 +1628,8 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): implicit_returning or should_implicit_return_defaults, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py index 830f845b4..911061640 100644 --- a/lib/sqlalchemy/sql/dml.py +++ b/lib/sqlalchemy/sql/dml.py @@ -32,8 +32,8 @@ from typing import Union from . import coercions from . import roles from . import util as sql_util -from ._typing import _no_kw from ._typing import _TP +from ._typing import _unexpected_kw from ._typing import is_column_element from ._typing import is_named_from_clause from .base import _entity_namespace_key @@ -455,6 +455,7 @@ class UpdateBase( self, *cols: _DMLColumnArgument, supplemental_cols: Optional[Iterable[_DMLColumnArgument]] = None, + sort_by_parameter_order: bool = False, ) -> Self: """Make use of a :term:`RETURNING` clause for the purpose of fetching server-side expressions and defaults, for supporting @@ -603,6 +604,20 @@ class UpdateBase( .. versionadded:: 2.0 + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT + .. seealso:: :meth:`.UpdateBase.returning` @@ -636,7 +651,13 @@ class UpdateBase( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) self._return_defaults = True - + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to " + "return_defaults() only applies to INSERT statements" + ) + self._sort_by_parameter_order = True if supplemental_cols: # uniquifying while also maintaining order (the maintain of order # is for test suites but also for vertical splicing @@ -661,7 +682,10 @@ class UpdateBase( @_generative def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> UpdateBase: r"""Add a :term:`RETURNING` or equivalent clause to this statement. @@ -723,6 +747,25 @@ class UpdateBase( read the documentation notes for the database in use in order to determine the availability of RETURNING. + :param \*cols: series of columns, SQL expressions, or whole tables + entities to be returned. + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT (Core level discussion) + + :ref:`orm_queryguide_bulk_insert_returning_ordered` - example of + use with :ref:`orm_queryguide_bulk_insert` (ORM level discussion) + .. seealso:: :meth:`.UpdateBase.return_defaults` - an alternative method tailored @@ -733,7 +776,7 @@ class UpdateBase( """ # noqa: E501 if __kw: - raise _no_kw() + raise _unexpected_kw("UpdateBase.returning()", __kw) if self._return_defaults: raise exc.InvalidRequestError( "return_defaults() is already configured on this statement" @@ -741,6 +784,13 @@ class UpdateBase( self._returning += tuple( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to returning() " + "only applies to INSERT statements" + ) + self._sort_by_parameter_order = True return self def corresponding_column( @@ -1123,6 +1173,8 @@ class Insert(ValuesBase): select = None include_insert_from_select_defaults = False + _sort_by_parameter_order: bool = False + is_insert = True table: TableClause @@ -1143,6 +1195,7 @@ class Insert(ValuesBase): "_return_defaults_columns", InternalTraversal.dp_clauseelement_tuple, ), + ("_sort_by_parameter_order", InternalTraversal.dp_boolean), ] + HasPrefixes._has_prefixes_traverse_internals + DialectKWArgs._dialect_kwargs_traverse_internals @@ -1231,24 +1284,35 @@ class Insert(ValuesBase): if TYPE_CHECKING: - # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 + # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 ", *, sort_by_parameter_order: bool = False" # noqa: E501 # code within this block is **programmatically, # statically generated** by tools/generate_tuple_map_overloads.py @overload - def returning(self, __ent0: _TCCA[_T0]) -> ReturningInsert[Tuple[_T0]]: + def returning( + self, __ent0: _TCCA[_T0], *, sort_by_parameter_order: bool = False + ) -> ReturningInsert[Tuple[_T0]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1], __ent2: _TCCA[_T2] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + __ent2: _TCCA[_T2], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2]]: ... @@ -1259,6 +1323,8 @@ class Insert(ValuesBase): __ent1: _TCCA[_T1], __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3]]: ... @@ -1270,6 +1336,8 @@ class Insert(ValuesBase): __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4]]: ... @@ -1282,6 +1350,8 @@ class Insert(ValuesBase): __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5]]: ... @@ -1295,6 +1365,8 @@ class Insert(ValuesBase): __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6]]: ... @@ -1309,6 +1381,8 @@ class Insert(ValuesBase): __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], __ent7: _TCCA[_T7], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7]]: ... @@ -1316,12 +1390,18 @@ class Insert(ValuesBase): @overload def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index b32201c7f..ff47ec79d 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -91,7 +91,6 @@ if typing.TYPE_CHECKING: from .compiler import SQLCompiler from .functions import FunctionElement from .operators import OperatorType - from .schema import _ServerDefaultType from .schema import Column from .schema import DefaultGenerator from .schema import FetchedValue @@ -1306,6 +1305,8 @@ class ColumnElement( primary_key: bool = False _is_clone_of: Optional[ColumnElement[_T]] _is_column_element = True + _insert_sentinel: bool = False + _omit_from_statements = False foreign_keys: AbstractSet[ForeignKey] = frozenset() @@ -2255,6 +2256,8 @@ class TextClause( _render_label_in_columns_clause = False + _omit_from_statements = False + @property def _hide_froms(self) -> Iterable[FromClause]: return () @@ -4710,7 +4713,7 @@ class ColumnClause( onupdate: Optional[DefaultGenerator] = None default: Optional[DefaultGenerator] = None - server_default: Optional[_ServerDefaultType] = None + server_default: Optional[FetchedValue] = None server_onupdate: Optional[FetchedValue] = None _is_multiparam_column = False diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index 7d964ad05..192096469 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -58,7 +58,10 @@ from . import ddl from . import roles from . import type_api from . import visitors +from .base import _DefaultDescriptionTuple from .base import _NoneName +from .base import _SentinelColumnCharacterization +from .base import _SentinelDefaultCharacterization from .base import DedupeColumnCollection from .base import DialectKWArgs from .base import Executable @@ -77,6 +80,7 @@ from .. import event from .. import exc from .. import inspection from .. import util +from ..util import HasMemoized from ..util.typing import Final from ..util.typing import Literal from ..util.typing import Protocol @@ -107,7 +111,6 @@ if typing.TYPE_CHECKING: _T = TypeVar("_T", bound="Any") _SI = TypeVar("_SI", bound="SchemaItem") -_ServerDefaultType = Union["FetchedValue", str, TextClause, ColumnElement[Any]] _TAB = TypeVar("_TAB", bound="Table") @@ -115,6 +118,10 @@ _CreateDropBind = Union["Engine", "Connection", "MockConnection"] _ConstraintNameArgument = Optional[Union[str, _NoneName]] +_ServerDefaultArgument = Union[ + "FetchedValue", str, TextClause, ColumnElement[Any] +] + class SchemaConst(Enum): @@ -345,6 +352,8 @@ class Table( _columns: DedupeColumnCollection[Column[Any]] + _sentinel_column: Optional[Column[Any]] + constraints: Set[Constraint] """A collection of all :class:`_schema.Constraint` objects associated with this :class:`_schema.Table`. @@ -819,6 +828,8 @@ class Table( assert isinstance(schema, str) self.schema = quoted_name(schema, quote_schema) + self._sentinel_column = None + self.indexes = set() self.constraints = set() PrimaryKeyConstraint( @@ -1005,6 +1016,140 @@ class Table( def _autoincrement_column(self) -> Optional[Column[int]]: return self.primary_key._autoincrement_column + @util.ro_memoized_property + def _sentinel_column_characteristics( + self, + ) -> _SentinelColumnCharacterization: + """determine a candidate column (or columns, in case of a client + generated composite primary key) which can be used as an + "insert sentinel" for an INSERT statement. + + The returned structure, :class:`_SentinelColumnCharacterization`, + includes all the details needed by :class:`.Dialect` and + :class:`.SQLCompiler` to determine if these column(s) can be used + as an INSERT..RETURNING sentinel for a particular database + dialect. + + .. versionadded:: 2.0.10 + + """ + + sentinel_is_explicit = False + sentinel_is_autoinc = False + the_sentinel: Optional[_typing_Sequence[Column[Any]]] = None + + # see if a column was explicitly marked "insert_sentinel=True". + explicit_sentinel_col = self._sentinel_column + + if explicit_sentinel_col is not None: + the_sentinel = (explicit_sentinel_col,) + sentinel_is_explicit = True + + autoinc_col = self._autoincrement_column + if sentinel_is_explicit and explicit_sentinel_col is autoinc_col: + assert autoinc_col is not None + sentinel_is_autoinc = True + elif explicit_sentinel_col is None and autoinc_col is not None: + the_sentinel = (autoinc_col,) + sentinel_is_autoinc = True + + default_characterization = _SentinelDefaultCharacterization.UNKNOWN + + if the_sentinel: + the_sentinel_zero = the_sentinel[0] + if the_sentinel_zero.identity: + + if the_sentinel_zero.identity._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use IDENTITY default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + else: + default_characterization = ( + _SentinelDefaultCharacterization.IDENTITY + ) + elif ( + the_sentinel_zero.default is None + and the_sentinel_zero.server_default is None + ): + if the_sentinel_zero.nullable: + raise exc.InvalidRequestError( + f"Column {the_sentinel_zero} has been marked as a " + "sentinel " + "column with no default generation function; it " + "at least needs to be marked nullable=False assuming " + "user-populated sentinel values will be used." + ) + default_characterization = ( + _SentinelDefaultCharacterization.NONE + ) + elif the_sentinel_zero.default is not None: + if the_sentinel_zero.default.is_sentinel: + default_characterization = ( + _SentinelDefaultCharacterization.SENTINEL_DEFAULT + ) + elif default_is_sequence(the_sentinel_zero.default): + + if the_sentinel_zero.default._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use SEQUENCE default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + + default_characterization = ( + _SentinelDefaultCharacterization.SEQUENCE + ) + elif the_sentinel_zero.default.is_callable: + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + elif the_sentinel_zero.server_default is not None: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + f"Column {the_sentinel[0]} can't be a sentinel column " + "because it uses an explicit server side default " + "that's not the Identity() default." + ) + + default_characterization = ( + _SentinelDefaultCharacterization.SERVERSIDE + ) + + if the_sentinel is None and self.primary_key: + assert autoinc_col is None + + # determine for non-autoincrement pk if all elements are + # client side + for _pkc in self.primary_key: + if _pkc.server_default is not None or ( + _pkc.default and not _pkc.default.is_callable + ): + break + else: + the_sentinel = tuple(self.primary_key) + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + + return _SentinelColumnCharacterization( + the_sentinel, + sentinel_is_explicit, + sentinel_is_autoinc, + default_characterization, + ) + @property def autoincrement_column(self) -> Optional[Column[int]]: """Returns the :class:`.Column` object which currently represents @@ -1361,6 +1506,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): inherit_cache = True key: str + server_default: Optional[FetchedValue] + def __init__( self, __name_pos: Optional[ @@ -1384,11 +1531,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): ] = SchemaConst.NULL_UNSPECIFIED, onupdate: Optional[Any] = None, primary_key: bool = False, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, quote: Optional[bool] = None, system: bool = False, comment: Optional[str] = None, + insert_sentinel: bool = False, + _omit_from_statements: bool = False, _proxies: Optional[Any] = None, **dialect_kwargs: Any, ): @@ -1873,6 +2022,22 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): :paramref:`_schema.Column.comment` parameter to :class:`_schema.Column`. + :param insert_sentinel: Marks this :class:`_schema.Column` as an + :term:`insert sentinel` used for optimizing the performance of the + :term:`insertmanyvalues` feature for tables that don't + otherwise have qualifying primary key configurations. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :func:`_schema.insert_sentinel` - all in one helper for declaring + sentinel columns + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + """ # noqa: E501, RST201, RST202 @@ -1914,7 +2079,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): self.key = key if key is not None else name # type: ignore self.primary_key = primary_key - + self._insert_sentinel = insert_sentinel + self._omit_from_statements = _omit_from_statements self._user_defined_nullable = udn = nullable if udn is not NULL_UNSPECIFIED: self.nullable = udn @@ -1962,22 +2128,26 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): else: self.onpudate = None - self.server_default = server_default - self.server_onupdate = server_onupdate - - if self.server_default is not None: - if isinstance(self.server_default, FetchedValue): - l_args.append(self.server_default._as_for_update(False)) + if server_default is not None: + if isinstance(server_default, FetchedValue): + server_default = server_default._as_for_update(False) + l_args.append(server_default) else: - l_args.append(DefaultClause(self.server_default)) + server_default = DefaultClause(server_default) + l_args.append(server_default) + self.server_default = server_default - if self.server_onupdate is not None: - if isinstance(self.server_onupdate, FetchedValue): - l_args.append(self.server_onupdate._as_for_update(True)) + if server_onupdate is not None: + if isinstance(server_onupdate, FetchedValue): + server_onupdate = server_onupdate._as_for_update(True) + l_args.append(server_onupdate) else: - l_args.append( - DefaultClause(self.server_onupdate, for_update=True) + server_onupdate = DefaultClause( + server_onupdate, for_update=True ) + l_args.append(server_onupdate) + self.server_onupdate = server_onupdate + self._init_items(*cast(_typing_Sequence[SchemaItem], l_args)) util.set_creation_order(self) @@ -2042,6 +2212,17 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): if isinstance(impl, SchemaEventTarget): impl._set_parent_with_dispatch(self) + @HasMemoized.memoized_attribute + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @HasMemoized.memoized_attribute + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + @util.memoized_property def _gen_static_annotations_cache_key(self) -> bool: # type: ignore """special attribute used by cache key gen, if true, we will @@ -2185,6 +2366,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): all_names[self.name] = self self.table = table + if self._insert_sentinel: + if self.table._sentinel_column is not None: + raise exc.ArgumentError( + "a Table may have only one explicit sentinel column" + ) + self.table._sentinel_column = self + if self.primary_key: table.primary_key._replace(self) elif self.key in table.primary_key: @@ -2316,6 +2504,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): server_onupdate=server_onupdate, doc=self.doc, comment=self.comment, + _omit_from_statements=self._omit_from_statements, + insert_sentinel=self._insert_sentinel, *args, **column_kwargs, ) @@ -2472,6 +2662,56 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): return c.key, c +def insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[_T]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> Column[Any]: + """Provides a surrogate :class:`_schema.Column` that will act as a + dedicated insert :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that + don't otherwise have qualifying primary key configurations. + + Adding this column to a :class:`.Table` object requires that a + corresponding database table actually has this column present, so if adding + it to an existing model, existing database tables would need to be migrated + (e.g. using ALTER TABLE or similar) to include this column. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + The :class:`_schema.Column` returned will be a nullable integer column by + default and make use of a sentinel-specific default generator used only in + "insertmanyvalues" operations. + + .. seealso:: + + :func:`_orm.orm_insert_sentinel` + + :paramref:`_schema.Column.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + return Column( + name=name, + type_=type_api.INTEGERTYPE if type_ is None else type_, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + ) + + class ForeignKey(DialectKWArgs, SchemaItem): """Defines a dependency between two columns. @@ -3058,17 +3298,24 @@ else: class DefaultGenerator(Executable, SchemaItem): - """Base class for column *default* values.""" + """Base class for column *default* values. + + This object is only present on column.default or column.onupdate. + It's not valid as a server default. + + """ __visit_name__ = "default_generator" _is_default_generator = True is_sequence = False + is_identity = False is_server_default = False is_clause_element = False is_callable = False is_scalar = False has_arg = False + is_sentinel = False column: Optional[Column[Any]] def __init__(self, for_update: bool = False) -> None: @@ -3222,6 +3469,44 @@ class ScalarElementColumnDefault(ColumnDefault): ) +class _InsertSentinelColumnDefault(ColumnDefault): + """Default generator that's specific to the use of a "sentinel" column + when using the insertmanyvalues feature. + + This default is used as part of the :func:`_schema.insert_sentinel` + construct. + + """ + + is_sentinel = True + for_update = False + arg = None + + def __new__(cls) -> _InsertSentinelColumnDefault: + return object.__new__(cls) + + def __init__(self) -> None: + pass + + def _set_parent(self, parent: SchemaEventTarget, **kw: Any) -> None: + col = cast("Column[Any]", parent) + if not col._insert_sentinel: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column marked as insert_sentinel=True" + ) + elif not col.nullable: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column that is nullable" + ) + + super()._set_parent(parent, **kw) + + def _copy(self) -> _InsertSentinelColumnDefault: + return _InsertSentinelColumnDefault() + + _SQLExprDefault = Union["ColumnElement[Any]", "TextClause"] @@ -3366,6 +3651,10 @@ class IdentityOptions: self.cache = cache self.order = order + @property + def _increment_is_negative(self) -> bool: + return self.increment is not None and self.increment < 0 + class Sequence(HasSchemaAttr, IdentityOptions, DefaultGenerator): """Represents a named database sequence. @@ -3674,6 +3963,7 @@ class FetchedValue(SchemaEventTarget): reflected = False has_argument = False is_clause_element = False + is_identity = False column: Optional[Column[Any]] @@ -5668,6 +5958,8 @@ class Identity(IdentityOptions, FetchedValue, SchemaItem): __visit_name__ = "identity_column" + is_identity = True + def __init__( self, always: bool = False, diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 5f530ba7d..8a371951e 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -63,6 +63,7 @@ from .base import _EntityNamespace from .base import _expand_cloned from .base import _from_objects from .base import _generative +from .base import _never_select_column from .base import _NoArg from .base import _select_iterables from .base import CacheableOptions @@ -930,7 +931,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): @util.ro_non_memoized_property def _select_iterable(self) -> _SelectIterable: - return self.c + return (c for c in self.c if not _never_select_column(c)) def _init_collections(self) -> None: assert "_columns" not in self.__dict__ diff --git a/lib/sqlalchemy/sql/type_api.py b/lib/sqlalchemy/sql/type_api.py index ddbdb49a0..5af12cb93 100644 --- a/lib/sqlalchemy/sql/type_api.py +++ b/lib/sqlalchemy/sql/type_api.py @@ -94,6 +94,11 @@ class _ResultProcessorType(Protocol[_T_co]): ... +class _SentinelProcessorType(Protocol[_T_co]): + def __call__(self, value: Any) -> Optional[_T_co]: + ... + + class _BaseTypeMemoDict(TypedDict): impl: TypeEngine[Any] result: Dict[Any, Optional[_ResultProcessorType[Any]]] @@ -102,6 +107,7 @@ class _BaseTypeMemoDict(TypedDict): class _TypeMemoDict(_BaseTypeMemoDict, total=False): literal: Optional[_LiteralProcessorType[Any]] bind: Optional[_BindProcessorType[Any]] + sentinel: Optional[_SentinelProcessorType[Any]] custom: Dict[Any, object] @@ -598,6 +604,18 @@ class TypeEngine(Visitable, Generic[_T]): """ return None + def _sentinel_value_resolver( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + """Return an optional callable that will match parameter values + (post-bind processing) to result values + (pre-result-processing), for use in the "sentinel" feature. + + .. versionadded:: 2.0.10 + + """ + return None + @util.memoized_property def _has_bind_expression(self) -> bool: """memoized boolean, check if bind_expression is implemented. @@ -945,6 +963,19 @@ class TypeEngine(Visitable, Generic[_T]): d["result"][coltype] = rp return rp + def _cached_sentinel_value_processor( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + + try: + return dialect._type_memos[self]["sentinel"] + except KeyError: + pass + + d = self._dialect_info(dialect) + d["sentinel"] = bp = d["impl"]._sentinel_value_resolver(dialect) + return bp + def _cached_custom_processor( self, dialect: Dialect, key: str, fn: Callable[[TypeEngine[_T]], _O] ) -> _O: diff --git a/lib/sqlalchemy/testing/config.py b/lib/sqlalchemy/testing/config.py index 6c5ebd3dd..855918365 100644 --- a/lib/sqlalchemy/testing/config.py +++ b/lib/sqlalchemy/testing/config.py @@ -10,6 +10,7 @@ from __future__ import annotations import collections +import inspect import typing from typing import Any from typing import Callable @@ -185,7 +186,7 @@ class Variation: return [typ(casename, argname, case_names) for casename in case_names] -def variation(argname, cases): +def variation(argname_or_fn, cases=None): """a helper around testing.combinations that provides a single namespace that can be used as a switch. @@ -221,6 +222,17 @@ def variation(argname, cases): """ + if inspect.isfunction(argname_or_fn): + argname = argname_or_fn.__name__ + cases = argname_or_fn(None) + + @variation_fixture(argname, cases) + def go(self, request): + yield request.param + + return go + else: + argname = argname_or_fn cases_plus_limitations = [ entry if (isinstance(entry, tuple) and len(entry) == 2) diff --git a/lib/sqlalchemy/testing/fixtures.py b/lib/sqlalchemy/testing/fixtures.py index a8bc6c50a..fc1fa1483 100644 --- a/lib/sqlalchemy/testing/fixtures.py +++ b/lib/sqlalchemy/testing/fixtures.py @@ -10,12 +10,14 @@ from __future__ import annotations import itertools +import random import re import sys import sqlalchemy as sa from . import assertions from . import config +from . import mock from . import schema from .assertions import eq_ from .assertions import ne_ @@ -1003,3 +1005,54 @@ class CacheKeyFixture: range(len(case_a)), 2 ): self._compare_equal(case_a[a], case_b[b], compare_values) + + +def insertmanyvalues_fixture( + connection, randomize_rows=False, warn_on_downgraded=False +): + + dialect = connection.dialect + orig_dialect = dialect._deliver_insertmanyvalues_batches + orig_conn = connection._exec_insertmany_context + + class RandomCursor: + __slots__ = ("cursor",) + + def __init__(self, cursor): + self.cursor = cursor + + # only this method is called by the deliver method. + # by not having the other methods we assert that those aren't being + # used + + def fetchall(self): + rows = self.cursor.fetchall() + rows = list(rows) + random.shuffle(rows) + return rows + + def _deliver_insertmanyvalues_batches( + cursor, statement, parameters, generic_setinputsizes, context + ): + if randomize_rows: + cursor = RandomCursor(cursor) + for batch in orig_dialect( + cursor, statement, parameters, generic_setinputsizes, context + ): + if warn_on_downgraded and batch.is_downgraded: + util.warn("Batches were downgraded for sorted INSERT") + + yield batch + + def _exec_insertmany_context( + dialect, + context, + ): + with mock.patch.object( + dialect, + "_deliver_insertmanyvalues_batches", + new=_deliver_insertmanyvalues_batches, + ): + return orig_conn(dialect, context) + + connection._exec_insertmany_context = _exec_insertmany_context diff --git a/lib/sqlalchemy/testing/provision.py b/lib/sqlalchemy/testing/provision.py index 43821854c..66ccb3ee8 100644 --- a/lib/sqlalchemy/testing/provision.py +++ b/lib/sqlalchemy/testing/provision.py @@ -43,7 +43,7 @@ class register: return decorate - def __call__(self, cfg, *arg): + def __call__(self, cfg, *arg, **kw): if isinstance(cfg, str): url = sa_url.make_url(cfg) elif isinstance(cfg, sa_url.URL): @@ -52,9 +52,9 @@ class register: url = cfg.db.url backend = url.get_backend_name() if backend in self.fns: - return self.fns[backend](cfg, *arg) + return self.fns[backend](cfg, *arg, **kw) else: - return self.fns["*"](cfg, *arg) + return self.fns["*"](cfg, *arg, **kw) def create_follower_db(follower_ident): @@ -462,7 +462,9 @@ def set_default_schema_on_connection(cfg, dbapi_connection, schema_name): @register.init -def upsert(cfg, table, returning, set_lambda=None): +def upsert( + cfg, table, returning, *, set_lambda=None, sort_by_parameter_order=False +): """return the backends insert..on conflict / on dupe etc. construct. while we should add a backend-neutral upsert construct as well, such as diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index 44d174086..3ba7f8e89 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -1891,6 +1891,31 @@ class UuidTest(_LiteralRoundTripFixture, fixtures.TablesTest): filter_=lambda x: x.lower(), ) + @testing.requires.insert_returning + def test_uuid_returning(self, connection): + data = uuid.uuid4() + str_data = str(data) + uuid_table = self.tables.uuid_table + + result = connection.execute( + uuid_table.insert().returning( + uuid_table.c.uuid_data, + uuid_table.c.uuid_text_data, + uuid_table.c.uuid_data_nonnative, + uuid_table.c.uuid_text_data_nonnative, + ), + { + "id": 1, + "uuid_data": data, + "uuid_text_data": str_data, + "uuid_data_nonnative": data, + "uuid_text_data_nonnative": str_data, + }, + ) + row = result.first() + + eq_(row, (data, str_data, data, str_data)) + class NativeUUIDTest(UuidTest): __requires__ = ("uuid_data_type",) diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py index d443cf031..6f409c9e2 100644 --- a/lib/sqlalchemy/util/__init__.py +++ b/lib/sqlalchemy/util/__init__.py @@ -26,6 +26,7 @@ from ._collections import IdentitySet as IdentitySet from ._collections import immutabledict as immutabledict from ._collections import LRUCache as LRUCache from ._collections import merge_lists_w_ordering as merge_lists_w_ordering +from ._collections import NONE_SET as NONE_SET from ._collections import ordered_column_set as ordered_column_set from ._collections import OrderedDict as OrderedDict from ._collections import OrderedIdentitySet as OrderedIdentitySet diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py index af117dc92..2e793e862 100644 --- a/lib/sqlalchemy/util/_collections.py +++ b/lib/sqlalchemy/util/_collections.py @@ -69,6 +69,7 @@ _VT = TypeVar("_VT", bound=Any) _T_co = TypeVar("_T_co", covariant=True) EMPTY_SET: FrozenSet[Any] = frozenset() +NONE_SET: FrozenSet[Any] = frozenset([None]) def merge_lists_w_ordering(a: List[Any], b: List[Any]) -> List[Any]: diff --git a/test/dialect/mssql/test_engine.py b/test/dialect/mssql/test_engine.py index 095df2eaf..799452ade 100644 --- a/test/dialect/mssql/test_engine.py +++ b/test/dialect/mssql/test_engine.py @@ -3,7 +3,6 @@ import re from unittest.mock import Mock from sqlalchemy import Column -from sqlalchemy import create_engine from sqlalchemy import event from sqlalchemy import exc from sqlalchemy import inspect @@ -629,14 +628,6 @@ class MiscTest(fixtures.TestBase): __only_on__ = "mssql" __backend__ = True - def test_no_insertmanyvalues(self): - with expect_raises_message( - exc.ArgumentError, - "The use_insertmanyvalues feature on SQL Server is " - "currently not safe to use", - ): - create_engine("mssql+pyodbc://", use_insertmanyvalues=True) - @testing.variation("enable_comments", [True, False]) def test_comments_enabled_disabled( self, testing_engine, metadata, enable_comments diff --git a/test/engine/test_logging.py b/test/engine/test_logging.py index 19c26f43c..a498ec85c 100644 --- a/test/engine/test_logging.py +++ b/test/engine/test_logging.py @@ -283,7 +283,8 @@ class LogParamsTest(fixtures.TestBase): eq_regex( self.buf.buffer[4].message, - r"\[generated in .* \(insertmanyvalues\)\] \('d0', 'd1', " + r"\[generated in .* \(insertmanyvalues\) 1/3 " + r"\(unordered\)\] \('d0', 'd1', " r"'d2', 'd3', 'd4', 'd5', 'd6', 'd7', " r"'d8', 'd9', 'd10', 'd11', 'd12', 'd13', 'd14', 'd15', " r"'d16', 'd17', 'd18', 'd19', 'd20', 'd21', 'd22', 'd23', " @@ -304,7 +305,7 @@ class LogParamsTest(fixtures.TestBase): eq_(self.buf.buffer[5].message, full_insert) eq_( self.buf.buffer[6].message, - "[insertmanyvalues batch 2 of 3] ('d150', 'd151', 'd152', " + "[insertmanyvalues 2/3 (unordered)] ('d150', 'd151', 'd152', " "'d153', 'd154', 'd155', 'd156', 'd157', 'd158', 'd159', " "'d160', 'd161', 'd162', 'd163', 'd164', 'd165', 'd166', " "'d167', 'd168', 'd169', 'd170', 'd171', 'd172', 'd173', " @@ -330,7 +331,7 @@ class LogParamsTest(fixtures.TestBase): ) eq_( self.buf.buffer[8].message, - "[insertmanyvalues batch 3 of 3] ('d300', 'd301', 'd302', " + "[insertmanyvalues 3/3 (unordered)] ('d300', 'd301', 'd302', " "'d303', 'd304', 'd305', 'd306', 'd307', 'd308', 'd309', " "'d310', 'd311', 'd312', 'd313', 'd314', 'd315', 'd316', " "'d317', 'd318', 'd319', 'd320', 'd321', 'd322', 'd323', " diff --git a/test/orm/declarative/test_basic.py b/test/orm/declarative/test_basic.py index 698b66db1..d0e56819c 100644 --- a/test/orm/declarative/test_basic.py +++ b/test/orm/declarative/test_basic.py @@ -1,4 +1,5 @@ import random +import uuid import sqlalchemy as sa from sqlalchemy import CheckConstraint @@ -13,6 +14,7 @@ from sqlalchemy import select from sqlalchemy import String from sqlalchemy import testing from sqlalchemy import UniqueConstraint +from sqlalchemy import Uuid from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm import as_declarative from sqlalchemy.orm import backref @@ -209,6 +211,26 @@ class DeclarativeBaseSetupsTest(fixtures.TestBase): ): Base.__init__(fs, x=5) + def test_insert_sentinel_param_custom_type_maintained(self, decl_base): + class A(decl_base): + __tablename__ = "a" + id: Mapped[uuid.UUID] = mapped_column( + default=uuid.uuid4, primary_key=True, insert_sentinel=True + ) + data: Mapped[str] + + is_(A.id.expression.type._type_affinity, Uuid) + + def test_insert_sentinel_param_default_type(self, decl_base): + class A(decl_base): + __tablename__ = "a" + id: Mapped[int] = mapped_column( + primary_key=True, insert_sentinel=True + ) + data: Mapped[str] + + is_(A.id.expression.type._type_affinity, Integer) + @testing.variation("argument", ["version_id_col", "polymorphic_on"]) @testing.variation("column_type", ["anno", "non_anno", "plain_column"]) def test_mapped_column_version_poly_arg( diff --git a/test/orm/declarative/test_inheritance.py b/test/orm/declarative/test_inheritance.py index e8658926b..333d24230 100644 --- a/test/orm/declarative/test_inheritance.py +++ b/test/orm/declarative/test_inheritance.py @@ -1,3 +1,5 @@ +import contextlib + import sqlalchemy as sa from sqlalchemy import ForeignKey from sqlalchemy import Identity @@ -7,6 +9,7 @@ from sqlalchemy import String from sqlalchemy import testing from sqlalchemy.orm import class_mapper from sqlalchemy.orm import close_all_sessions +from sqlalchemy.orm import column_property from sqlalchemy.orm import configure_mappers from sqlalchemy.orm import declared_attr from sqlalchemy.orm import deferred @@ -20,6 +23,7 @@ from sqlalchemy.testing import assert_raises from sqlalchemy.testing import assert_raises_message from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_raises_message +from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures from sqlalchemy.testing import is_ from sqlalchemy.testing import is_false @@ -987,6 +991,98 @@ class DeclarativeInheritanceTest( session.commit() eq_(session.query(Engineer).first().target, o1) + @testing.variation("omit_from_statements", [True, False]) + @testing.variation("combine_on_b", [True, False]) + @testing.variation("c_first", [True, False]) + def test_use_existing_column_other_inh_types( + self, decl_base, omit_from_statements, combine_on_b, c_first + ): + """test additional fixes to use_existing_column, adding + some new use cases with "omit_from_statements" which in this case + is essentially the same as adding it to the mapper exclude_cols + list. + + """ + + class A(decl_base): + __tablename__ = "a" + + id: Mapped[int] = mapped_column(primary_key=True) + data: Mapped[str] + extra: Mapped[int] = mapped_column( + use_existing_column=True, + _omit_from_statements=bool(omit_from_statements), + ) + + if c_first: + + class C(A): + foo: Mapped[str] + extra: Mapped[int] = mapped_column( + use_existing_column=True, + _omit_from_statements=bool(omit_from_statements), + ) + + if not combine_on_b and not omit_from_statements: + ctx = expect_warnings( + "Implicitly combining column a.extra with column b.extra", + raise_on_any_unexpected=True, + ) + else: + ctx = contextlib.nullcontext() + + with ctx: + + class B(A): + __tablename__ = "b" + id: Mapped[int] = mapped_column( + ForeignKey("a.id"), primary_key=True + ) + if combine_on_b: + extra: Mapped[int] = column_property( + mapped_column( + _omit_from_statements=bool(omit_from_statements) + ), + A.extra, + ) + else: + extra: Mapped[int] = mapped_column( + use_existing_column=True, + _omit_from_statements=bool(omit_from_statements), + ) + + if not c_first: + + class C(A): # noqa: F811 + foo: Mapped[str] + extra: Mapped[int] = mapped_column( + use_existing_column=True, + _omit_from_statements=bool(omit_from_statements), + ) + + if bool(omit_from_statements): + self.assert_compile(select(A), "SELECT a.id, a.data FROM a") + else: + self.assert_compile( + select(A), "SELECT a.id, a.data, a.extra FROM a" + ) + + if bool(omit_from_statements) and not combine_on_b: + self.assert_compile( + select(B), + "SELECT b.id, a.id AS id_1, a.data " + "FROM a JOIN b ON a.id = b.id", + ) + else: + # if we combine_on_b we made a column_property, which brought + # out "extra" even if it was omit_from_statements. this should be + # expected + self.assert_compile( + select(B), + "SELECT b.id, a.id AS id_1, a.data, b.extra, " + "a.extra AS extra_1 FROM a JOIN b ON a.id = b.id", + ) + @testing.variation("decl_type", ["legacy", "use_existing_column"]) def test_columns_single_inheritance_conflict_resolution_pk( self, decl_base, decl_type diff --git a/test/orm/dml/test_bulk_statements.py b/test/orm/dml/test_bulk_statements.py index 7a9f3324f..84ea7c82c 100644 --- a/test/orm/dml/test_bulk_statements.py +++ b/test/orm/dml/test_bulk_statements.py @@ -1,10 +1,13 @@ from __future__ import annotations +import contextlib from typing import Any from typing import List from typing import Optional +from typing import Set import uuid +from sqlalchemy import event from sqlalchemy import exc from sqlalchemy import ForeignKey from sqlalchemy import func @@ -22,45 +25,101 @@ from sqlalchemy.orm import column_property from sqlalchemy.orm import load_only from sqlalchemy.orm import Mapped from sqlalchemy.orm import mapped_column +from sqlalchemy.orm import orm_insert_sentinel +from sqlalchemy.orm import Session from sqlalchemy.testing import config from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_raises_message +from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures from sqlalchemy.testing import mock from sqlalchemy.testing import provision from sqlalchemy.testing.assertsql import CompiledSQL +from sqlalchemy.testing.assertsql import Conditional from sqlalchemy.testing.entities import ComparableEntity from sqlalchemy.testing.fixtures import fixture_session class InsertStmtTest(testing.AssertsExecutionResults, fixtures.TestBase): - def test_no_returning_error(self, decl_base): + __backend__ = True + + @testing.variation( + "style", + [ + "no_executemany", + ("no_sort_by", testing.requires.insert_returning), + ("all_enabled", testing.requires.insert_returning), + ], + ) + @testing.variation("sort_by_parameter_order", [True, False]) + def test_no_returning_error( + self, + decl_base, + testing_engine, + style: testing.Variation, + sort_by_parameter_order, + ): class A(fixtures.ComparableEntity, decl_base): __tablename__ = "a" id: Mapped[int] = mapped_column(Identity(), primary_key=True) data: Mapped[str] x: Mapped[Optional[int]] = mapped_column("xcol") - decl_base.metadata.create_all(testing.db) - s = fixture_session() + engine = testing_engine() + + if style.no_executemany: + engine.dialect.use_insertmanyvalues = False + engine.dialect.insert_executemany_returning = False + engine.dialect.insert_executemany_returning_sort_by_parameter_order = ( # noqa: E501 + False + ) + elif style.no_sort_by: + engine.dialect.use_insertmanyvalues = True + engine.dialect.insert_executemany_returning = True + engine.dialect.insert_executemany_returning_sort_by_parameter_order = ( # noqa: E501 + False + ) + elif style.all_enabled: + engine.dialect.use_insertmanyvalues = True + engine.dialect.insert_executemany_returning = True + engine.dialect.insert_executemany_returning_sort_by_parameter_order = ( # noqa: E501 + True + ) + else: + style.fail() + + decl_base.metadata.create_all(engine) + s = Session(engine) - if testing.requires.insert_executemany_returning.enabled: + if style.all_enabled or ( + style.no_sort_by and not sort_by_parameter_order + ): result = s.scalars( - insert(A).returning(A), + insert(A).returning( + A, sort_by_parameter_order=bool(sort_by_parameter_order) + ), [ {"data": "d3", "x": 5}, {"data": "d4", "x": 6}, ], ) - eq_(result.all(), [A(data="d3", x=5), A(data="d4", x=6)]) + eq_(set(result.all()), {A(data="d3", x=5), A(data="d4", x=6)}) else: with expect_raises_message( exc.InvalidRequestError, - "Can't use explicit RETURNING for bulk INSERT operation", + r"Can't use explicit RETURNING for bulk INSERT operation.*" + rf"""executemany with RETURNING{ + ' and sort by parameter order' + if sort_by_parameter_order else '' + } is """ + r"not enabled for this dialect", ): s.scalars( - insert(A).returning(A), + insert(A).returning( + A, + sort_by_parameter_order=bool(sort_by_parameter_order), + ), [ {"data": "d3", "x": 5}, {"data": "d4", "x": 6}, @@ -132,6 +191,9 @@ class InsertStmtTest(testing.AssertsExecutionResults, fixtures.TestBase): ) @testing.requires.insert_returning + @testing.skip_if( + "oracle", "oracle doesn't like the no-FROM SELECT inside of an INSERT" + ) def test_insert_from_select_col_property(self, decl_base): """test #9273""" @@ -166,6 +228,40 @@ class InsertStmtTest(testing.AssertsExecutionResults, fixtures.TestBase): class BulkDMLReturningInhTest: + use_sentinel = False + randomize_returning = False + + def assert_for_downgrade(self, *, sort_by_parameter_order): + if ( + not sort_by_parameter_order + or not self.randomize_returning + or not testing.against(["postgresql", "mssql", "mariadb"]) + ): + return contextlib.nullcontext() + else: + return expect_warnings("Batches were downgraded") + + @classmethod + def setup_bind(cls): + if cls.randomize_returning: + new_eng = config.db.execution_options() + + @event.listens_for(new_eng, "engine_connect") + def eng_connect(connection): + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=True, + # there should be no sentinel downgrades for any of + # these three dbs. sqlite has downgrades + warn_on_downgraded=testing.against( + ["postgresql", "mssql", "mariadb"] + ), + ) + + return new_eng + else: + return config.db + def test_insert_col_key_also_works_currently(self): """using the column key, not mapped attr key. @@ -178,7 +274,7 @@ class BulkDMLReturningInhTest: """ A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) s.execute(insert(A).values(type="a", data="d", xcol=10)) eq_(s.scalars(select(A.x)).all(), [10]) @@ -186,7 +282,7 @@ class BulkDMLReturningInhTest: def test_autoflush(self, autoflush_option): A = self.classes.A - s = fixture_session() + s = fixture_session(bind=self.bind) a1 = A(data="x1") s.add(a1) @@ -211,8 +307,9 @@ class BulkDMLReturningInhTest: else: assert False - @testing.combinations(True, False, argnames="use_returning") - def test_heterogeneous_keys(self, use_returning): + @testing.variation("use_returning", [True, False]) + @testing.variation("sort_by_parameter_order", [True, False]) + def test_heterogeneous_keys(self, use_returning, sort_by_parameter_order): A, B = self.classes("A", "B") values = [ @@ -224,21 +321,31 @@ class BulkDMLReturningInhTest: {"data": "d8", "x": 7, "type": "a"}, ] - s = fixture_session() + s = fixture_session(bind=self.bind) stmt = insert(A) if use_returning: - stmt = stmt.returning(A) + stmt = stmt.returning( + A, sort_by_parameter_order=bool(sort_by_parameter_order) + ) with self.sql_execution_asserter() as asserter: result = s.execute(stmt, values) if use_returning: + if self.use_sentinel and sort_by_parameter_order: + _sentinel_col = ", _sentinel" + _sentinel_returning = ", a._sentinel" + _sentinel_param = ", :_sentinel" + else: + _sentinel_col = _sentinel_param = _sentinel_returning = "" + # note no sentinel col is used when there is only one row asserter.assert_( CompiledSQL( - "INSERT INTO a (type, data, xcol) VALUES " - "(:type, :data, :xcol) " - "RETURNING a.id, a.type, a.data, a.xcol, a.y", + f"INSERT INTO a (type, data, xcol{_sentinel_col}) VALUES " + f"(:type, :data, :xcol{_sentinel_param}) " + f"RETURNING a.id, a.type, a.data, a.xcol, a.y" + f"{_sentinel_returning}", [ {"type": "a", "data": "d3", "xcol": 5}, {"type": "a", "data": "d4", "xcol": 6}, @@ -250,9 +357,10 @@ class BulkDMLReturningInhTest: [{"type": "a", "data": "d5"}], ), CompiledSQL( - "INSERT INTO a (type, data, xcol, y) " - "VALUES (:type, :data, :xcol, :y) " - "RETURNING a.id, a.type, a.data, a.xcol, a.y", + f"INSERT INTO a (type, data, xcol, y{_sentinel_col}) " + f"VALUES (:type, :data, :xcol, :y{_sentinel_param}) " + f"RETURNING a.id, a.type, a.data, a.xcol, a.y" + f"{_sentinel_returning}", [ {"type": "a", "data": "d6", "xcol": 8, "y": 9}, {"type": "a", "data": "d7", "xcol": 12, "y": 12}, @@ -297,15 +405,15 @@ class BulkDMLReturningInhTest: if use_returning: with self.assert_statement_count(testing.db, 0): eq_( - result.scalars().all(), - [ + set(result.scalars().all()), + { A(data="d3", id=mock.ANY, type="a", x=5, y=None), A(data="d4", id=mock.ANY, type="a", x=6, y=None), A(data="d5", id=mock.ANY, type="a", x=None, y=None), A(data="d6", id=mock.ANY, type="a", x=8, y=9), A(data="d7", id=mock.ANY, type="a", x=12, y=12), A(data="d8", id=mock.ANY, type="a", x=7, y=None), - ], + }, ) @testing.combinations( @@ -315,10 +423,8 @@ class BulkDMLReturningInhTest: "cols_w_exprs", argnames="paramstyle", ) - @testing.combinations( - True, - (False, testing.requires.multivalues_inserts), - argnames="single_element", + @testing.variation( + "single_element", [True, (False, testing.requires.multivalues_inserts)] ) def test_single_values_returning_fn(self, paramstyle, single_element): """test using insert().values(). @@ -364,7 +470,7 @@ class BulkDMLReturningInhTest: else: assert False - s = fixture_session() + s = fixture_session(bind=self.bind) if single_element: if paramstyle.startswith("strings"): @@ -405,7 +511,7 @@ class BulkDMLReturningInhTest: }, ] - s = fixture_session() + s = fixture_session(bind=self.bind) stmt = ( insert(A) @@ -415,11 +521,11 @@ class BulkDMLReturningInhTest: for i in range(3): result = s.execute(stmt, data) - expected: List[Any] = [ + expected: Set[Any] = { (A(data="dd", x=5, y=9), "DD"), (A(data="dd", x=10, y=8), "DD"), - ] - eq_(result.all(), expected) + } + eq_(set(result.all()), expected) def test_bulk_w_sql_expressions_subclass(self): A, B = self.classes("A", "B") @@ -429,7 +535,7 @@ class BulkDMLReturningInhTest: {"bd": "bd2", "x": 5, "y": 6, "z": 7, "q": 8}, ] - s = fixture_session() + s = fixture_session(bind=self.bind) stmt = ( insert(B) @@ -439,17 +545,17 @@ class BulkDMLReturningInhTest: for i in range(3): result = s.execute(stmt, data) - expected: List[Any] = [ + expected: Set[Any] = { (B(bd="bd1", data="dd", q=4, type="b", x=1, y=2, z=3), "DD"), (B(bd="bd2", data="dd", q=8, type="b", x=5, y=6, z=7), "DD"), - ] - eq_(result.all(), expected) + } + eq_(set(result), expected) @testing.combinations(True, False, argnames="use_ordered") def test_bulk_upd_w_sql_expressions_no_ordered_values(self, use_ordered): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) stmt = update(B).ordered_values( ("data", func.lower("DD_UPDATE")), @@ -471,13 +577,16 @@ class BulkDMLReturningInhTest: def test_bulk_upd_w_sql_expressions_subclass(self): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) data = [ {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4}, {"data": "d4", "bd": "bd2", "x": 5, "y": 6, "z": 7, "q": 8}, ] - ids = s.scalars(insert(B).returning(B.id), data).all() + ids = { + row.data: row.id + for row in s.execute(insert(B).returning(B.id, B.data), data) + } stmt = update(B).values( data=func.lower("DD_UPDATE"), z=literal_column("3 + 12") @@ -486,8 +595,8 @@ class BulkDMLReturningInhTest: result = s.execute( stmt, [ - {"id": ids[0], "bd": "bd1_updated"}, - {"id": ids[1], "bd": "bd2_updated"}, + {"id": ids["d3"], "bd": "bd1_updated"}, + {"id": ids["d4"], "bd": "bd2_updated"}, ], ) @@ -495,12 +604,12 @@ class BulkDMLReturningInhTest: assert result is not None eq_( - s.scalars(select(B)).all(), - [ + set(s.scalars(select(B))), + { B( bd="bd1_updated", data="dd_update", - id=ids[0], + id=ids["d3"], q=4, type="b", x=1, @@ -510,36 +619,32 @@ class BulkDMLReturningInhTest: B( bd="bd2_updated", data="dd_update", - id=ids[1], + id=ids["d4"], q=8, type="b", x=5, y=6, z=15, ), - ], + }, ) def test_single_returning_fn(self): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) for i in range(3): result = s.execute( insert(A).returning(A, func.upper(A.data, type_=String)), [{"data": "d3"}, {"data": "d4"}], ) - eq_(result.all(), [(A(data="d3"), "D3"), (A(data="d4"), "D4")]) + eq_(set(result), {(A(data="d3"), "D3"), (A(data="d4"), "D4")}) - @testing.combinations( - True, - False, - argnames="single_element", - ) + @testing.variation("single_element", [True, False]) def test_subclass_no_returning(self, single_element): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) if single_element: data = {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4} @@ -552,19 +657,16 @@ class BulkDMLReturningInhTest: result = s.execute(insert(B), data) assert result._soft_closed - @testing.combinations( - True, - False, - argnames="single_element", - ) - def test_subclass_load_only(self, single_element): + @testing.variation("sort_by_parameter_order", [True, False]) + @testing.variation("single_element", [True, False]) + def test_subclass_load_only(self, single_element, sort_by_parameter_order): """test that load_only() prevents additional attributes from being populated. """ A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) if single_element: data = {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4} @@ -578,7 +680,12 @@ class BulkDMLReturningInhTest: # tests both caching and that the data dictionaries aren't # mutated... result = s.execute( - insert(B).returning(B).options(load_only(B.data, B.y, B.q)), + insert(B) + .returning( + B, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + .options(load_only(B.data, B.y, B.q)), data, ) objects = result.scalars().all() @@ -593,13 +700,14 @@ class BulkDMLReturningInhTest: ] if not single_element: expected.append(B(data="d4", bd="bd2", x=5, y=6, z=7, q=8)) - eq_(objects, expected) - @testing.combinations( - True, - False, - argnames="single_element", - ) + if sort_by_parameter_order: + coll = list + else: + coll = set + eq_(coll(objects), coll(expected)) + + @testing.variation("single_element", [True, False]) def test_subclass_load_only_doesnt_fetch_cols(self, single_element): """test that when using load_only(), the actual INSERT statement does not include the deferred columns @@ -607,7 +715,7 @@ class BulkDMLReturningInhTest: """ A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) data = [ {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4}, @@ -699,30 +807,60 @@ class BulkDMLReturningInhTest: # RETURNING only includes PK, discriminator, then the cols # we asked for data, y, q. xcol, z, bd are omitted. plus they # are broken out correctly in the two statements. + asserter.assert_( - CompiledSQL( - "INSERT INTO a (type, data, xcol, y) VALUES " - "(:type, :data, :xcol, :y) " - "RETURNING a.id, a.type, a.data, a.y", - a_data, - ), - CompiledSQL( - "INSERT INTO b (id, bd, zcol, q) " - "VALUES (:id, :bd, :zcol, :q) " - "RETURNING b.id, b.q", - b_data, - ), + Conditional( + self.use_sentinel and not single_element, + [ + CompiledSQL( + "INSERT INTO a (type, data, xcol, y, _sentinel) " + "VALUES " + "(:type, :data, :xcol, :y, :_sentinel) " + "RETURNING a.id, a.type, a.data, a.y, a._sentinel", + a_data, + ), + CompiledSQL( + "INSERT INTO b (id, bd, zcol, q, _sentinel) " + "VALUES (:id, :bd, :zcol, :q, :_sentinel) " + "RETURNING b.id, b.q, b._sentinel", + b_data, + ), + ], + [ + CompiledSQL( + "INSERT INTO a (type, data, xcol, y) VALUES " + "(:type, :data, :xcol, :y) " + "RETURNING a.id, a.type, a.data, a.y", + a_data, + ), + Conditional( + single_element, + [ + CompiledSQL( + "INSERT INTO b (id, bd, zcol, q) " + "VALUES (:id, :bd, :zcol, :q) " + "RETURNING b.id, b.q", + b_data, + ), + ], + [ + CompiledSQL( + "INSERT INTO b (id, bd, zcol, q) " + "VALUES (:id, :bd, :zcol, :q) " + "RETURNING b.id, b.q, b.id AS id__1", + b_data, + ), + ], + ), + ], + ) ) - @testing.combinations( - True, - False, - argnames="single_element", - ) + @testing.variation("single_element", [True, False]) def test_subclass_returning_bind_expr(self, single_element): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) if single_element: data = {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4} @@ -740,24 +878,27 @@ class BulkDMLReturningInhTest: if single_element: eq_(result.all(), [("d3", 2, 9)]) else: - eq_(result.all(), [("d3", 2, 9), ("d4", 6, 13)]) + eq_(set(result), {("d3", 2, 9), ("d4", 6, 13)}) def test_subclass_bulk_update(self): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) data = [ {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4}, {"data": "d4", "bd": "bd2", "x": 5, "y": 6, "z": 7, "q": 8}, ] - ids = s.scalars(insert(B).returning(B.id), data).all() + ids = { + row.data: row.id + for row in s.execute(insert(B).returning(B.id, B.data), data).all() + } result = s.execute( update(B), [ - {"id": ids[0], "data": "d3_updated", "bd": "bd1_updated"}, - {"id": ids[1], "data": "d4_updated", "bd": "bd2_updated"}, + {"id": ids["d3"], "data": "d3_updated", "bd": "bd1_updated"}, + {"id": ids["d4"], "data": "d4_updated", "bd": "bd2_updated"}, ], ) @@ -765,12 +906,12 @@ class BulkDMLReturningInhTest: assert result is not None eq_( - s.scalars(select(B)).all(), - [ + set(s.scalars(select(B))), + { B( bd="bd1_updated", data="d3_updated", - id=ids[0], + id=ids["d3"], q=4, type="b", x=1, @@ -780,21 +921,24 @@ class BulkDMLReturningInhTest: B( bd="bd2_updated", data="d4_updated", - id=ids[1], + id=ids["d4"], q=8, type="b", x=5, y=6, z=7, ), - ], + }, ) - @testing.combinations(True, False, argnames="single_element") - def test_subclass_return_just_subclass_ids(self, single_element): + @testing.variation("single_element", [True, False]) + @testing.variation("sort_by_parameter_order", [True, False]) + def test_subclass_return_just_subclass_ids( + self, single_element, sort_by_parameter_order + ): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) if single_element: data = {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4} @@ -804,16 +948,24 @@ class BulkDMLReturningInhTest: {"data": "d4", "bd": "bd2", "x": 5, "y": 6, "z": 7, "q": 8}, ] - ids = s.scalars(insert(B).returning(B.id), data).all() - actual_ids = s.scalars(select(B.id).order_by(B.data)).all() + ids = s.execute( + insert(B).returning( + B.id, + B.data, + sort_by_parameter_order=bool(sort_by_parameter_order), + ), + data, + ) + actual_ids = s.execute(select(B.id, B.data).order_by(B.id)) - eq_(ids, actual_ids) + if sort_by_parameter_order: + coll = list + else: + coll = set - @testing.combinations( - "orm", - "bulk", - argnames="insert_strategy", - ) + eq_(coll(ids), coll(actual_ids)) + + @testing.variation("insert_strategy", ["orm", "bulk", "bulk_ordered"]) @testing.requires.provisioned_upsert def test_base_class_upsert(self, insert_strategy): """upsert is really tricky. if you dont have any data updated, @@ -825,17 +977,22 @@ class BulkDMLReturningInhTest: """ A = self.classes.A - s = fixture_session() + s = fixture_session(bind=self.bind) initial_data = [ {"data": "d3", "x": 1, "y": 2, "q": 4}, {"data": "d4", "x": 5, "y": 6, "q": 8}, ] - ids = s.scalars(insert(A).returning(A.id), initial_data).all() + ids = { + row.data: row.id + for row in s.execute( + insert(A).returning(A.id, A.data), initial_data + ) + } upsert_data = [ { - "id": ids[0], + "id": ids["d3"], "type": "a", "data": "d3", "x": 1, @@ -849,7 +1006,7 @@ class BulkDMLReturningInhTest: "y": 5, }, { - "id": ids[1], + "id": ids["d4"], "type": "a", "data": "d4", "x": 5, @@ -868,24 +1025,28 @@ class BulkDMLReturningInhTest: config, A, (A,), - lambda inserted: {"data": inserted.data + " upserted"}, + set_lambda=lambda inserted: {"data": inserted.data + " upserted"}, + sort_by_parameter_order=insert_strategy.bulk_ordered, ) - if insert_strategy == "orm": + if insert_strategy.orm: result = s.scalars(stmt.values(upsert_data)) - elif insert_strategy == "bulk": - result = s.scalars(stmt, upsert_data) + elif insert_strategy.bulk or insert_strategy.bulk_ordered: + with self.assert_for_downgrade( + sort_by_parameter_order=insert_strategy.bulk_ordered + ): + result = s.scalars(stmt, upsert_data) else: - assert False + insert_strategy.fail() eq_( - result.all(), - [ - A(data="d3 upserted", id=ids[0], type="a", x=1, y=2), + set(result.all()), + { + A(data="d3 upserted", id=ids["d3"], type="a", x=1, y=2), A(data="d32", id=32, type="a", x=19, y=5), - A(data="d4 upserted", id=ids[1], type="a", x=5, y=6), + A(data="d4 upserted", id=ids["d4"], type="a", x=5, y=6), A(data="d28", id=28, type="a", x=9, y=15), - ], + }, ) @testing.combinations( @@ -893,13 +1054,14 @@ class BulkDMLReturningInhTest: "bulk", argnames="insert_strategy", ) + @testing.variation("sort_by_parameter_order", [True, False]) @testing.requires.provisioned_upsert - def test_subclass_upsert(self, insert_strategy): + def test_subclass_upsert(self, insert_strategy, sort_by_parameter_order): """note this is overridden in the joined version to expect failure""" A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) idd3 = 1 idd4 = 2 @@ -926,11 +1088,19 @@ class BulkDMLReturningInhTest: "q": 8, }, ] - ids = s.scalars(insert(B).returning(B.id), initial_data).all() + ids = { + row.data: row.id + for row in s.execute( + insert(B).returning( + B.id, B.data, sort_by_parameter_order=True + ), + initial_data, + ) + } upsert_data = [ { - "id": ids[0], + "id": ids["d3"], "type": "b", "data": "d3", "bd": "bd1_upserted", @@ -950,7 +1120,7 @@ class BulkDMLReturningInhTest: "q": 21, }, { - "id": ids[1], + "id": ids["d4"], "type": "b", "bd": "bd2_upserted", "data": "d4", @@ -975,19 +1145,24 @@ class BulkDMLReturningInhTest: config, B, (B,), - lambda inserted: { + set_lambda=lambda inserted: { "data": inserted.data + " upserted", "bd": inserted.bd + " upserted", }, + sort_by_parameter_order=bool(sort_by_parameter_order), ) - result = s.scalars(stmt, upsert_data) + + with self.assert_for_downgrade( + sort_by_parameter_order=bool(sort_by_parameter_order) + ): + result = s.scalars(stmt, upsert_data) eq_( - result.all(), - [ + set(result), + { B( bd="bd1_upserted upserted", data="d3 upserted", - id=ids[0], + id=ids["d3"], q=4, type="b", x=1, @@ -1007,7 +1182,7 @@ class BulkDMLReturningInhTest: B( bd="bd2_upserted upserted", data="d4 upserted", - id=ids[1], + id=ids["d4"], q=8, type="b", x=5, @@ -1024,10 +1199,34 @@ class BulkDMLReturningInhTest: y=15, z=10, ), - ], + }, ) +@testing.combinations( + ( + "no_sentinel", + False, + ), + ( + "w_sentinel", + True, + ), + argnames="use_sentinel", + id_="ia", +) +@testing.combinations( + ( + "nonrandom", + False, + ), + ( + "random", + True, + ), + argnames="randomize_returning", + id_="ia", +) class BulkDMLReturningJoinedInhTest( BulkDMLReturningInhTest, fixtures.DeclarativeMappedTest ): @@ -1035,6 +1234,9 @@ class BulkDMLReturningJoinedInhTest( __requires__ = ("insert_returning", "insert_executemany_returning") __backend__ = True + use_sentinel = False + randomize_returning = False + @classmethod def setup_classes(cls): decl_base = cls.DeclarativeBasic @@ -1047,6 +1249,9 @@ class BulkDMLReturningJoinedInhTest( x: Mapped[Optional[int]] = mapped_column("xcol") y: Mapped[Optional[int]] + if cls.use_sentinel: + _sentinel: Mapped[int] = orm_insert_sentinel() + __mapper_args__ = { "polymorphic_identity": "a", "polymorphic_on": "type", @@ -1061,6 +1266,9 @@ class BulkDMLReturningJoinedInhTest( z: Mapped[Optional[int]] = mapped_column("zcol") q: Mapped[Optional[int]] + if cls.use_sentinel: + _sentinel: Mapped[int] = orm_insert_sentinel() + __mapper_args__ = {"polymorphic_identity": "b"} @testing.combinations( @@ -1073,17 +1281,26 @@ class BulkDMLReturningJoinedInhTest( False, argnames="single_param", ) + @testing.variation("sort_by_parameter_order", [True, False]) @testing.requires.provisioned_upsert - def test_subclass_upsert(self, insert_strategy, single_param): + def test_subclass_upsert( + self, + insert_strategy, + single_param, + sort_by_parameter_order, + ): A, B = self.classes("A", "B") - s = fixture_session() + s = fixture_session(bind=self.bind) initial_data = [ {"data": "d3", "bd": "bd1", "x": 1, "y": 2, "z": 3, "q": 4}, {"data": "d4", "bd": "bd2", "x": 5, "y": 6, "z": 7, "q": 8}, ] - ids = s.scalars(insert(B).returning(B.id), initial_data).all() + ids = s.scalars( + insert(B).returning(B.id, sort_by_parameter_order=True), + initial_data, + ).all() upsert_data = [ { @@ -1102,9 +1319,10 @@ class BulkDMLReturningJoinedInhTest( config, B, (B,), - lambda inserted: { + set_lambda=lambda inserted: { "bd": inserted.bd + " upserted", }, + sort_by_parameter_order=bool(sort_by_parameter_order), ) with expect_raises_message( @@ -1115,6 +1333,18 @@ class BulkDMLReturningJoinedInhTest( s.scalars(stmt, upsert_data) +@testing.combinations( + ( + "nonrandom", + False, + ), + ( + "random", + True, + ), + argnames="randomize_returning", + id_="ia", +) class BulkDMLReturningSingleInhTest( BulkDMLReturningInhTest, fixtures.DeclarativeMappedTest ): @@ -1146,6 +1376,18 @@ class BulkDMLReturningSingleInhTest( __mapper_args__ = {"polymorphic_identity": "b"} +@testing.combinations( + ( + "nonrandom", + False, + ), + ( + "random", + True, + ), + argnames="randomize_returning", + id_="ia", +) class BulkDMLReturningConcreteInhTest( BulkDMLReturningInhTest, fixtures.DeclarativeMappedTest ): @@ -1253,7 +1495,7 @@ class CTETest(fixtures.DeclarativeMappedTest): else: assert False - sess = fixture_session() + sess = fixture_session(bind=self.bind) with self.sql_execution_asserter() as asserter: if not expect_entity: diff --git a/test/orm/test_ac_relationships.py b/test/orm/test_ac_relationships.py index a5efd9930..b500c1e1a 100644 --- a/test/orm/test_ac_relationships.py +++ b/test/orm/test_ac_relationships.py @@ -3,6 +3,7 @@ from sqlalchemy import Column from sqlalchemy import exc from sqlalchemy import ForeignKey from sqlalchemy import func +from sqlalchemy import insert_sentinel from sqlalchemy import Integer from sqlalchemy import join from sqlalchemy import select @@ -42,6 +43,7 @@ class PartitionByFixture(fixtures.DeclarativeMappedTest): __tablename__ = "c" id = Column(Integer, primary_key=True) b_id = Column(ForeignKey("b.id")) + _sentinel = insert_sentinel() partition = select( B, diff --git a/test/orm/test_defaults.py b/test/orm/test_defaults.py index fb6fba704..562d9b9dc 100644 --- a/test/orm/test_defaults.py +++ b/test/orm/test_defaults.py @@ -10,6 +10,7 @@ from sqlalchemy.testing import fixtures from sqlalchemy.testing.assertsql import assert_engine from sqlalchemy.testing.assertsql import CompiledSQL from sqlalchemy.testing.assertsql import Conditional +from sqlalchemy.testing.assertsql import RegexSQL from sqlalchemy.testing.fixtures import fixture_session from sqlalchemy.testing.schema import Column from sqlalchemy.testing.schema import Table @@ -285,26 +286,26 @@ class ComputedDefaultsOnUpdateTest(fixtures.MappedTest): Conditional( testing.db.dialect.insert_executemany_returning, [ - CompiledSQL( - "INSERT INTO test (id, foo) " - "VALUES (%(id)s, %(foo)s) " - "RETURNING test.bar", + RegexSQL( + r"INSERT INTO test \(id, foo\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test.bar, test.id", [{"foo": 5, "id": 1}, {"foo": 10, "id": 2}], dialect="postgresql", ), ], [ - CompiledSQL( - "INSERT INTO test (id, foo) " - "VALUES (%(id)s, %(foo)s) " - "RETURNING test.bar", + RegexSQL( + r"INSERT INTO test \(id, foo\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test.bar, test.id", [{"foo": 5, "id": 1}], dialect="postgresql", ), - CompiledSQL( - "INSERT INTO test (id, foo) " - "VALUES (%(id)s, %(foo)s) " - "RETURNING test.bar", + RegexSQL( + r"INSERT INTO test \(id, foo\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test.bar, test.id", [{"foo": 10, "id": 2}], dialect="postgresql", ), @@ -468,23 +469,23 @@ class IdentityDefaultsOnUpdateTest(fixtures.MappedTest): Conditional( testing.db.dialect.insert_executemany_returning, [ - CompiledSQL( - "INSERT INTO test (foo) VALUES (%(foo)s) " - "RETURNING test.id", + RegexSQL( + r"INSERT INTO test \(foo\).*VALUES (.*).* " + r"RETURNING test.id, test.id AS id__1", [{"foo": 5}, {"foo": 10}], dialect="postgresql", ), ], [ - CompiledSQL( - "INSERT INTO test (foo) VALUES (%(foo)s) " - "RETURNING test.id", + RegexSQL( + r"INSERT INTO test \(foo\).*VALUES (.*).* " + r"RETURNING test.id, test.id AS id__1", [{"foo": 5}], dialect="postgresql", ), - CompiledSQL( - "INSERT INTO test (foo) VALUES (%(foo)s) " - "RETURNING test.id", + RegexSQL( + r"INSERT INTO test \(foo\).*VALUES (.*).* " + r"RETURNING test.id, test.id AS id__1", [{"foo": 10}], dialect="postgresql", ), diff --git a/test/orm/test_expire.py b/test/orm/test_expire.py index f98cae922..906771f16 100644 --- a/test/orm/test_expire.py +++ b/test/orm/test_expire.py @@ -506,7 +506,9 @@ class ExpireTest(_fixtures.FixtureTest): users, properties={ "addresses": relationship( - Address, cascade="all, refresh-expire" + Address, + cascade="all, refresh-expire", + order_by=addresses.c.id, ) }, ) diff --git a/test/orm/test_unitofwork.py b/test/orm/test_unitofwork.py index 5835ef65a..f9c565c86 100644 --- a/test/orm/test_unitofwork.py +++ b/test/orm/test_unitofwork.py @@ -1,6 +1,7 @@ """Tests unitofwork operations.""" import datetime +import re import sqlalchemy as sa from sqlalchemy import Boolean @@ -3513,14 +3514,15 @@ class PartialNullPKTest(fixtures.MappedTest): class NoRowInsertedTest(fixtures.TestBase): """test #7594. - failure modes when INSERT doesnt actually insert a row. + failure modes when INSERT doesn't actually insert a row. + s """ - __backend__ = True - # the test manipulates INSERTS to become UPDATES to simulate - # "INSERT that returns no row" so both are needed - __requires__ = ("insert_returning", "update_returning") + # "INSERT that returns no row" so both are needed; the manipulations + # are currently postgresql or SQLite specific + __backend__ = True + __only_on__ = ("postgresql", "sqlite") @testing.fixture def null_server_default_fixture(self, registry, connection): @@ -3537,30 +3539,26 @@ class NoRowInsertedTest(fixtures.TestBase): def revert_insert( conn, cursor, statement, parameters, context, executemany ): - if statement.startswith("INSERT"): - if statement.endswith("RETURNING my_table.id"): - if executemany and isinstance(parameters, list): - # remove some rows, so the count is wrong - parameters = parameters[0:1] - else: - # statement should return no rows - statement = ( - "UPDATE my_table SET id=NULL WHERE 1!=1 " - "RETURNING my_table.id" - ) - parameters = {} + if re.match(r"INSERT.* RETURNING (?:my_table.)?id", statement): + if executemany and isinstance(parameters, list): + # remove some rows, so the count is wrong + parameters = parameters[0:1] else: - assert not testing.against( - "postgresql" - ), "this test has to at least run on PostgreSQL" - testing.config.skip_test( - "backend doesn't support the expected form of " - "RETURNING for this test to work" + # statement should return no rows + statement = ( + "UPDATE my_table SET id=NULL WHERE 1!=1 " + "RETURNING my_table.id" ) + parameters = {} return statement, parameters return MyClass + @testing.only_on( + "postgresql", + "only postgresql uses RETURNING for a single-row " + "INSERT among the DBs we are using in this test", + ) def test_insert_single_no_pk_correct_exception( self, null_server_default_fixture, connection ): diff --git a/test/orm/test_unitofworkv2.py b/test/orm/test_unitofworkv2.py index 968285148..4d04ce0a6 100644 --- a/test/orm/test_unitofworkv2.py +++ b/test/orm/test_unitofworkv2.py @@ -1,5 +1,6 @@ from unittest.mock import Mock from unittest.mock import patch +import uuid from sqlalchemy import cast from sqlalchemy import DateTime @@ -9,6 +10,8 @@ from sqlalchemy import FetchedValue from sqlalchemy import ForeignKey from sqlalchemy import func from sqlalchemy import Identity +from sqlalchemy import insert +from sqlalchemy import insert_sentinel from sqlalchemy import inspect from sqlalchemy import Integer from sqlalchemy import JSON @@ -19,6 +22,7 @@ from sqlalchemy import String from sqlalchemy import testing from sqlalchemy import text from sqlalchemy import util +from sqlalchemy import Uuid from sqlalchemy.orm import attributes from sqlalchemy.orm import backref from sqlalchemy.orm import clear_mappers @@ -32,12 +36,14 @@ from sqlalchemy.testing import assert_warns_message from sqlalchemy.testing import config from sqlalchemy.testing import engines from sqlalchemy.testing import eq_ +from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures from sqlalchemy.testing import is_ from sqlalchemy.testing import variation_fixture from sqlalchemy.testing.assertsql import AllOf from sqlalchemy.testing.assertsql import CompiledSQL from sqlalchemy.testing.assertsql import Conditional +from sqlalchemy.testing.assertsql import RegexSQL from sqlalchemy.testing.fixtures import fixture_session from sqlalchemy.testing.provision import normalize_sequence from sqlalchemy.testing.schema import Column @@ -2536,23 +2542,26 @@ class EagerDefaultsTest(fixtures.MappedTest): Conditional( testing.db.dialect.insert_executemany_returning, [ - CompiledSQL( - "INSERT INTO test (id) VALUES (%(id)s) " - "RETURNING test.foo", + RegexSQL( + r"INSERT INTO test \(id\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test.foo, test.id", [{"id": 1}, {"id": 2}], dialect="postgresql", ), ], [ - CompiledSQL( - "INSERT INTO test (id) VALUES (%(id)s) " - "RETURNING test.foo", + RegexSQL( + r"INSERT INTO test \(id\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test.foo, test.id", [{"id": 1}], dialect="postgresql", ), - CompiledSQL( - "INSERT INTO test (id) VALUES (%(id)s) " - "RETURNING test.foo", + RegexSQL( + r"INSERT INTO test \(id\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test.foo, test.id", [{"id": 2}], dialect="postgresql", ), @@ -2595,26 +2604,26 @@ class EagerDefaultsTest(fixtures.MappedTest): Conditional( testing.db.dialect.insert_executemany_returning, [ - CompiledSQL( - "INSERT INTO test3 (id, foo) " - "VALUES (%(id)s, lower(%(lower_1)s)) " - "RETURNING test3.foo", + RegexSQL( + r"INSERT INTO test3 \(id, foo\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test3.foo, test3.id", [{"id": 1}, {"id": 2}], dialect="postgresql", ), ], [ - CompiledSQL( - "INSERT INTO test3 (id, foo) " - "VALUES (%(id)s, lower(%(lower_1)s)) " - "RETURNING test3.foo", + RegexSQL( + r"INSERT INTO test3 \(id, foo\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test3.foo, test3.id", [{"id": 1}], dialect="postgresql", ), - CompiledSQL( - "INSERT INTO test3 (id, foo) " - "VALUES (%(id)s, lower(%(lower_1)s)) " - "RETURNING test3.foo", + RegexSQL( + r"INSERT INTO test3 \(id, foo\) .*" + r"VALUES \(.*\) .*" + r"RETURNING test3.foo, test3.id", [{"id": 2}], dialect="postgresql", ), @@ -3830,9 +3839,19 @@ class TryToFoolInsertManyValuesTest(fixtures.TestBase): ("identity", testing.requires.identity_columns), ], ) - def test_bulk_insert_maintains_correct_pks( - self, decl_base, connection, pk_type - ): + @testing.variation( + "sentinel", + [ + "none", # passes because we automatically downgrade + # for no sentinel col + "implicit_not_omitted", + "implicit_omitted", + "explicit", + "default_uuid", + "default_string_uuid", + ], + ) + def test_original_use_case(self, decl_base, connection, pk_type, sentinel): """test #9603. this uses the ORM to ensure the ORM is not using any kind of @@ -3840,75 +3859,221 @@ class TryToFoolInsertManyValuesTest(fixtures.TestBase): specific to SQL Server, however if we identify any other similar issues in other DBs we should add tests to this suite. + NOTE: Assuming the code is not doing the correct kind of INSERT + for SQL Server, the SQL Server failure here is still extremely + difficult to trip; any changes to the table structure and it no longer + fails, and it's likely this version of the test might not fail on SQL + Server in any case. The test_this_really_fails_on_mssql_wo_full_fix is + more optimized to producing the SQL Server failure as reliably as + possible, however this can change at any time as SQL Server's decisions + here are completely opaque. + """ class Datum(decl_base): __tablename__ = "datum" - id = Column(Integer, autoincrement=False, primary_key=True) - data = Column(String(10)) + datum_id = Column(Integer, Identity(), primary_key=True) class Result(decl_base): __tablename__ = "result" if pk_type.plain_autoinc: - id = Column(Integer, primary_key=True) # noqa: A001 + result_id = Column(Integer, primary_key=True) elif pk_type.sequence: - id = Column( # noqa: A001 - Integer, Sequence("rid_seq", start=1), primary_key=True + result_id = Column( + Integer, + Sequence("result_id_seq", start=1), + primary_key=True, ) elif pk_type.identity: - id = Column( # noqa: A001 - Integer, Identity(), primary_key=True - ) + result_id = Column(Integer, Identity(), primary_key=True) else: pk_type.fail() - thing = Column(Integer) - lft_datum_id = Column(Integer, ForeignKey(Datum.id)) + lft_datum_id = Column(ForeignKey(Datum.datum_id)) - decl_base.metadata.create_all(connection) - with Session(connection) as sess: + lft_datum = relationship(Datum) - size = 15 - datum_ids = list(range(1, size + 1)) + if sentinel.implicit_not_omitted or sentinel.implicit_omitted: + _sentinel = insert_sentinel( + omit_from_statements=bool(sentinel.implicit_omitted), + ) + elif sentinel.explicit: + some_uuid = Column( + Uuid(), insert_sentinel=True, nullable=False + ) + elif sentinel.default_uuid or sentinel.default_string_uuid: + _sentinel = Column( + Uuid(native_uuid=bool(sentinel.default_uuid)), + insert_sentinel=True, + default=uuid.uuid4, + ) - sess.add_all([Datum(id=id_, data=f"d{id_}") for id_ in datum_ids]) - sess.flush() + class ResultDatum(decl_base): - result_data = [ - Result(thing=num, lft_datum_id=datum_ids[num % size]) - for num in range(size * size) - ] - sess.add_all(result_data) + __tablename__ = "result_datum" + + result_id = Column(ForeignKey(Result.result_id), primary_key=True) + lft_datum_id = Column(ForeignKey(Datum.datum_id)) + + lft_datum = relationship(Datum) + result = relationship(Result) + + if sentinel.implicit_not_omitted or sentinel.implicit_omitted: + _sentinel = insert_sentinel( + omit_from_statements=bool(sentinel.implicit_omitted), + ) + elif sentinel.explicit: + some_uuid = Column( + Uuid(native_uuid=False), + insert_sentinel=True, + nullable=False, + ) + elif sentinel.default_uuid or sentinel.default_string_uuid: + _sentinel = Column( + Uuid(native_uuid=bool(sentinel.default_uuid)), + insert_sentinel=True, + default=uuid.uuid4, + ) + + decl_base.metadata.create_all(connection) + N = 13 + with Session(connection) as sess: + full_range = [num for num in range(N * N)] + + datum_idx = [Datum() for num in range(N)] + sess.add_all(datum_idx) sess.flush() - # this is what we expected we put in - the_data_in_order_should_be = [ - (num + 1, num, datum_ids[num % size]) - for num in range(size * size) - ] + if sentinel.explicit: + result_idx = [ + Result( + lft_datum=datum_idx[n % N], + some_uuid=uuid.uuid4(), + ) + for n in full_range + ] + else: + result_idx = [ + Result( + lft_datum=datum_idx[n % N], + ) + for n in full_range + ] + + sess.add_all(result_idx) + + if sentinel.explicit: + sess.add_all( + ResultDatum( + lft_datum=datum_idx[n % N], + result=result_idx[n], + some_uuid=uuid.uuid4(), + ) + for n in full_range + ) + else: + sess.add_all( + ResultDatum( + lft_datum=datum_idx[n % N], + result=result_idx[n], + ) + for n in full_range + ) - # and yes, that's what went in - eq_( - sess.execute( - select( - Result.id, Result.thing, Result.lft_datum_id - ).order_by(Result.id) - ).all(), - the_data_in_order_should_be, + fixtures.insertmanyvalues_fixture( + sess.connection(), warn_on_downgraded=True ) + if ( + sentinel.none + and testing.db.dialect.insert_returning + and testing.db.dialect.use_insertmanyvalues + and select() + .compile(dialect=testing.db.dialect) + ._get_sentinel_column_for_table(Result.__table__) + is None + ): + with expect_warnings( + "Batches were downgraded for sorted INSERT" + ): + sess.flush() + else: + sess.flush() - # however, if insertmanyvalues is turned on, OUTPUT inserted - # did not give us the rows in the order we sent, so ids were - # mis-applied. even if we sort the original records by the - # ids that were given - eq_( - [ - (r.id, r.thing, r.lft_datum_id) - for r in sorted(result_data, key=lambda r: r.id) - ], - the_data_in_order_should_be, + num_bad = ( + sess.query(ResultDatum) + .join(Result) + .filter( + Result.lft_datum_id != ResultDatum.lft_datum_id, + ) + .count() ) + + eq_(num_bad, 0) + + @testing.only_on("mssql") + def test_this_really_fails_on_mssql_wo_full_fix( + self, decl_base, connection + ): + """this test tries as hard as possible to simulate the SQL server + failure. + + """ + + class Datum(decl_base): + + __tablename__ = "datum" + + datum_id = Column(Integer, primary_key=True) + data = Column(String(10)) + + class Result(decl_base): + + __tablename__ = "result" + + result_id = Column(Integer, primary_key=True) + + lft_datum_id = Column(Integer, ForeignKey(Datum.datum_id)) + + # use this instead to resolve; FK constraint is what affects + # SQL server + # lft_datum_id = Column(Integer) + + decl_base.metadata.create_all(connection) + + size = 13 + + result = connection.execute( + insert(Datum).returning(Datum.datum_id), + [{"data": f"d{i}"} for i in range(size)], + ) + + datum_ids = [row[0] for row in result] + assert datum_ids == list(range(1, size + 1)) + + # the rows are not inserted in the order that the table valued + # expressions are given. SQL Server organizes the rows so that the + # "datum_id" values are grouped + result = connection.execute( + insert(Result).returning( + Result.result_id, + Result.lft_datum_id, + sort_by_parameter_order=True, + ), + [ + {"lft_datum_id": datum_ids[num % size]} + for num in range(size * size) + ], + ) + + we_expect_returning_is = [ + {"result_id": num + 1, "lft_datum_id": datum_ids[num % size]} + for num in range(size * size) + ] + what_we_got_is = [ + {"result_id": row[0], "lft_datum_id": row[1]} for row in result + ] + eq_(we_expect_returning_is, what_we_got_is) diff --git a/test/requirements.py b/test/requirements.py index b76e671e9..9a8500ac3 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -1904,6 +1904,10 @@ class DefaultRequirements(SuiteRequirements): return only_if(["postgresql >= 10", "oracle >= 12", "mssql"]) @property + def multiple_identity_columns(self): + return only_if(["postgresql >= 10"]) + + @property def identity_columns_standard(self): return self.identity_columns + skip_if("mssql") diff --git a/test/sql/test_compiler.py b/test/sql/test_compiler.py index e05fafbdf..4d0864af9 100644 --- a/test/sql/test_compiler.py +++ b/test/sql/test_compiler.py @@ -35,6 +35,7 @@ from sqlalchemy import ForeignKey from sqlalchemy import func from sqlalchemy import Index from sqlalchemy import insert +from sqlalchemy import insert_sentinel from sqlalchemy import Integer from sqlalchemy import intersect from sqlalchemy import join @@ -7705,3 +7706,243 @@ class ResultMapTest(fixtures.TestBase): for orig_obj, proxied_obj in zip(orig, proxied): is_(orig_obj, proxied_obj) + + +class OmitFromStatementsTest(fixtures.TestBase, AssertsCompiledSQL): + """test the _omit_from_statements parameter. + + this somewhat awkward parameter was added to suit the case of + "insert_sentinel" columns that would try very hard not to be noticed + when not needed, by being omitted from any SQL statement that does not + refer to them explicitly. If they are referred to explicitly or + are in a context where their client side default has to be fired off, + then they are present. + + If marked public, the feature could be used as a general "I don't want to + see this column unless I asked it to" use case. + + """ + + __dialect__ = "default_enhanced" + + @testing.fixture + def t1(self): + m1 = MetaData() + + t1 = Table( + "t1", + m1, + Column("id", Integer, primary_key=True), + Column("a", Integer), + Column( + "b", Integer, _omit_from_statements=True, insert_sentinel=True + ), + Column("c", Integer), + Column("d", Integer, _omit_from_statements=True), + Column("e", Integer), + ) + return t1 + + @testing.fixture + def t2(self): + m1 = MetaData() + + t2 = Table( + "t2", + m1, + Column("id", Integer, primary_key=True), + Column("a", Integer), + Column( + "b", + Integer, + _omit_from_statements=True, + insert_sentinel=True, + default="10", + onupdate="20", + ), + Column("c", Integer, default="14", onupdate="19"), + Column( + "d", + Integer, + _omit_from_statements=True, + default="5", + onupdate="15", + ), + Column("e", Integer), + ) + return t2 + + @testing.fixture + def t3(self): + m1 = MetaData() + + t3 = Table( + "t3", + m1, + Column("id", Integer, primary_key=True), + Column("a", Integer), + insert_sentinel("b"), + Column("c", Integer, default="14", onupdate="19"), + ) + return t3 + + def test_select_omitted(self, t1): + self.assert_compile( + select(t1), "SELECT t1.id, t1.a, t1.c, t1.e FROM t1" + ) + + def test_select_from_subquery_includes_hidden(self, t1): + s1 = select(t1.c.a, t1.c.b, t1.c.c, t1.c.d, t1.c.e).subquery() + eq_(s1.c.keys(), ["a", "b", "c", "d", "e"]) + + self.assert_compile( + select(s1), + "SELECT anon_1.a, anon_1.b, anon_1.c, anon_1.d, anon_1.e " + "FROM (SELECT t1.a AS a, t1.b AS b, t1.c AS c, t1.d AS d, " + "t1.e AS e FROM t1) AS anon_1", + ) + + def test_select_from_subquery_omitted(self, t1): + s1 = select(t1).subquery() + + eq_(s1.c.keys(), ["id", "a", "c", "e"]) + self.assert_compile( + select(s1), + "SELECT anon_1.id, anon_1.a, anon_1.c, anon_1.e FROM " + "(SELECT t1.id AS id, t1.a AS a, t1.c AS c, t1.e AS e FROM t1) " + "AS anon_1", + ) + + def test_insert_omitted(self, t1): + self.assert_compile( + insert(t1), "INSERT INTO t1 (id, a, c, e) VALUES (:id, :a, :c, :e)" + ) + + def test_insert_from_select_omitted(self, t1): + self.assert_compile( + insert(t1).from_select(["a", "c", "e"], select(t1)), + "INSERT INTO t1 (a, c, e) SELECT t1.id, t1.a, t1.c, t1.e FROM t1", + ) + + def test_insert_from_select_included(self, t1): + self.assert_compile( + insert(t1).from_select(["a", "b", "c", "d", "e"], select(t1)), + "INSERT INTO t1 (a, b, c, d, e) SELECT t1.id, t1.a, t1.c, t1.e " + "FROM t1", + ) + + def test_insert_from_select_defaults_included(self, t2): + self.assert_compile( + insert(t2).from_select(["a", "c", "e"], select(t2)), + "INSERT INTO t2 (a, c, e, b, d) SELECT t2.id, t2.a, t2.c, t2.e, " + ":b AS anon_1, :d AS anon_2 FROM t2", + # TODO: do we have a test in test_defaults for this, that the + # default values get set up as expected? + ) + + def test_insert_from_select_sentinel_defaults_omitted(self, t3): + self.assert_compile( + # a pure SentinelDefault not included here, so there is no 'b' + insert(t3).from_select(["a", "c"], select(t3)), + "INSERT INTO t3 (a, c) SELECT t3.id, t3.a, t3.c FROM t3", + ) + + def test_insert_omitted_return_col_nonspecified(self, t1): + self.assert_compile( + insert(t1).returning(t1), + "INSERT INTO t1 (id, a, c, e) VALUES (:id, :a, :c, :e) " + "RETURNING t1.id, t1.a, t1.c, t1.e", + ) + + def test_insert_omitted_return_col_specified(self, t1): + self.assert_compile( + insert(t1).returning(t1.c.a, t1.c.b, t1.c.c, t1.c.d, t1.c.e), + "INSERT INTO t1 (id, a, c, e) VALUES (:id, :a, :c, :e) " + "RETURNING t1.a, t1.b, t1.c, t1.d, t1.e", + ) + + def test_insert_omitted_no_params(self, t1): + self.assert_compile( + insert(t1), "INSERT INTO t1 () VALUES ()", params={} + ) + + def test_insert_omitted_no_params_defaults(self, t2): + # omit columns that nonetheless have client-side defaults + # are included + self.assert_compile( + insert(t2), + "INSERT INTO t2 (b, c, d) VALUES (:b, :c, :d)", + params={}, + ) + + def test_insert_omitted_no_params_defaults_no_sentinel(self, t3): + # omit columns that nonetheless have client-side defaults + # are included + self.assert_compile( + insert(t3), + "INSERT INTO t3 (c) VALUES (:c)", + params={}, + ) + + def test_insert_omitted_defaults(self, t2): + self.assert_compile( + insert(t2), "INSERT INTO t2 (id, a, c, e) VALUES (:id, :a, :c, :e)" + ) + + def test_update_omitted(self, t1): + self.assert_compile( + update(t1), "UPDATE t1 SET id=:id, a=:a, c=:c, e=:e" + ) + + def test_update_omitted_defaults(self, t2): + self.assert_compile( + update(t2), "UPDATE t2 SET id=:id, a=:a, c=:c, e=:e" + ) + + def test_update_omitted_no_params_defaults(self, t2): + # omit columns that nonetheless have client-side defaults + # are included + self.assert_compile( + update(t2), "UPDATE t2 SET b=:b, c=:c, d=:d", params={} + ) + + def test_select_include_col(self, t1): + self.assert_compile( + select(t1, t1.c.b, t1.c.d), + "SELECT t1.id, t1.a, t1.c, t1.e, t1.b, t1.d FROM t1", + ) + + def test_update_include_col(self, t1): + self.assert_compile( + update(t1).values(a=5, b=10, c=15, d=20, e=25), + "UPDATE t1 SET a=:a, b=:b, c=:c, d=:d, e=:e", + checkparams={"a": 5, "b": 10, "c": 15, "d": 20, "e": 25}, + ) + + def test_insert_include_col(self, t1): + self.assert_compile( + insert(t1).values(a=5, b=10, c=15, d=20, e=25), + "INSERT INTO t1 (a, b, c, d, e) VALUES (:a, :b, :c, :d, :e)", + checkparams={"a": 5, "b": 10, "c": 15, "d": 20, "e": 25}, + ) + + def test_insert_include_col_via_keys(self, t1): + self.assert_compile( + insert(t1), + "INSERT INTO t1 (a, b, c, d, e) VALUES (:a, :b, :c, :d, :e)", + params={"a": 5, "b": 10, "c": 15, "d": 20, "e": 25}, + checkparams={"a": 5, "b": 10, "c": 15, "d": 20, "e": 25}, + ) + + def test_select_omitted_incl_whereclause(self, t1): + self.assert_compile( + select(t1).where(t1.c.d == 5), + "SELECT t1.id, t1.a, t1.c, t1.e FROM t1 WHERE t1.d = :d_1", + checkparams={"d_1": 5}, + ) + + def test_select_omitted_incl_order_by(self, t1): + self.assert_compile( + select(t1).order_by(t1.c.d), + "SELECT t1.id, t1.a, t1.c, t1.e FROM t1 ORDER BY t1.d", + ) diff --git a/test/sql/test_defaults.py b/test/sql/test_defaults.py index 633972b45..01f6b290c 100644 --- a/test/sql/test_defaults.py +++ b/test/sql/test_defaults.py @@ -1567,6 +1567,7 @@ class CurrentParametersTest(fixtures.TablesTest): some_table = self.tables.some_table some_table.c.x.default.arg = gen_default + some_table.c.x._reset_memoizations() return fn @testing.combinations( diff --git a/test/sql/test_insert.py b/test/sql/test_insert.py index 308f654f7..904271fcb 100644 --- a/test/sql/test_insert.py +++ b/test/sql/test_insert.py @@ -68,7 +68,7 @@ class _InsertTestBase: class InsertTest(_InsertTestBase, fixtures.TablesTest, AssertsCompiledSQL): - __dialect__ = "default" + __dialect__ = "default_enhanced" @testing.combinations( ((), ("z",), ()), @@ -94,6 +94,51 @@ class InsertTest(_InsertTestBase, fixtures.TablesTest, AssertsCompiledSQL): assert isinstance(stmt._return_defaults_columns, tuple) eq_(set(stmt._return_defaults_columns), expected) + @testing.variation("add_values", ["before", "after"]) + @testing.variation("multi_values", [True, False]) + @testing.variation("sort_by_parameter_order", [True, False]) + def test_sort_by_parameter_ordering_parameter_no_multi_values( + self, add_values, multi_values, sort_by_parameter_order + ): + t = table("foo", column("x"), column("y"), column("z")) + stmt = insert(t) + + if add_values.before: + if multi_values: + stmt = stmt.values([{"y": 6}, {"y": 7}]) + else: + stmt = stmt.values(y=6) + + stmt = stmt.returning( + t.c.x, sort_by_parameter_order=bool(sort_by_parameter_order) + ) + + if add_values.after: + if multi_values: + stmt = stmt.values([{"y": 6}, {"y": 7}]) + else: + stmt = stmt.values(y=6) + + if multi_values: + if sort_by_parameter_order: + with expect_raises_message( + exc.CompileError, + "RETURNING cannot be determinstically sorted " + "when using an INSERT", + ): + stmt.compile() + else: + self.assert_compile( + stmt, + "INSERT INTO foo (y) VALUES (:y_m0), (:y_m1) " + "RETURNING foo.x", + ) + else: + self.assert_compile( + stmt, + "INSERT INTO foo (y) VALUES (:y) RETURNING foo.x", + ) + def test_binds_that_match_columns(self): """test bind params named after column names replace the normal SET/VALUES generation. diff --git a/test/sql/test_insert_exec.py b/test/sql/test_insert_exec.py index 3b5a1856c..f545671e7 100644 --- a/test/sql/test_insert_exec.py +++ b/test/sql/test_insert_exec.py @@ -1,11 +1,19 @@ +import contextlib +import functools import itertools +import uuid from sqlalchemy import and_ +from sqlalchemy import ARRAY from sqlalchemy import bindparam +from sqlalchemy import DateTime from sqlalchemy import event from sqlalchemy import exc from sqlalchemy import ForeignKey from sqlalchemy import func +from sqlalchemy import Identity +from sqlalchemy import insert +from sqlalchemy import insert_sentinel from sqlalchemy import INT from sqlalchemy import Integer from sqlalchemy import literal @@ -14,16 +22,22 @@ from sqlalchemy import Sequence from sqlalchemy import sql from sqlalchemy import String from sqlalchemy import testing +from sqlalchemy import TypeDecorator +from sqlalchemy import Uuid from sqlalchemy import VARCHAR from sqlalchemy.engine import cursor as _cursor +from sqlalchemy.sql.compiler import InsertmanyvaluesSentinelOpts from sqlalchemy.testing import assert_raises_message from sqlalchemy.testing import config from sqlalchemy.testing import eq_ +from sqlalchemy.testing import expect_raises from sqlalchemy.testing import expect_raises_message +from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures from sqlalchemy.testing import is_ from sqlalchemy.testing import mock from sqlalchemy.testing import provision +from sqlalchemy.testing.fixtures import insertmanyvalues_fixture from sqlalchemy.testing.provision import normalize_sequence from sqlalchemy.testing.schema import Column from sqlalchemy.testing.schema import Table @@ -924,7 +938,7 @@ class InsertManyValuesTest(fixtures.RemovesEvents, fixtures.TablesTest): config, data, (data,), - lambda inserted: {"x": inserted.x + " upserted"}, + set_lambda=lambda inserted: {"x": inserted.x + " upserted"}, ) result = connection.execute(stmt, upsert_data) @@ -1169,3 +1183,1612 @@ class InsertManyValuesTest(fixtures.RemovesEvents, fixtures.TablesTest): "INSERT..RETURNING when executemany", ): conn.execute(stmt.returning(t.c.id), data) + + +class IMVSentinelTest(fixtures.TestBase): + __backend__ = True + + __requires__ = ("insert_returning",) + + def _expect_downgrade_warnings( + self, + *, + warn_for_downgrades, + sort_by_parameter_order, + separate_sentinel=False, + server_autoincrement=False, + client_side_pk=False, + autoincrement_is_sequence=False, + connection=None, + ): + + if connection: + dialect = connection.dialect + else: + dialect = testing.db.dialect + + if ( + sort_by_parameter_order + and warn_for_downgrades + and dialect.use_insertmanyvalues + ): + + if ( + not separate_sentinel + and ( + server_autoincrement + and ( + not ( + dialect.insertmanyvalues_implicit_sentinel # noqa: E501 + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ) + or ( + autoincrement_is_sequence + and not ( + dialect.insertmanyvalues_implicit_sentinel # noqa: E501 + & InsertmanyvaluesSentinelOpts.SEQUENCE + ) + ) + ) + ) + or ( + not separate_sentinel + and not server_autoincrement + and not client_side_pk + ) + ): + return expect_warnings( + "Batches were downgraded", + raise_on_any_unexpected=True, + ) + + return contextlib.nullcontext() + + @testing.variation + def sort_by_parameter_order(self): + return [True, False] + + @testing.variation + def warn_for_downgrades(self): + return [True, False] + + @testing.variation + def randomize_returning(self): + return [True, False] + + @testing.requires.insertmanyvalues + def test_fixture_randomizing(self, connection, metadata): + t = Table( + "t", + metadata, + Column("id", Integer, Identity(), primary_key=True), + Column("data", String(50)), + ) + metadata.create_all(connection) + + insertmanyvalues_fixture(connection, randomize_rows=True) + + results = set() + + for i in range(15): + result = connection.execute( + insert(t).returning(t.c.data, sort_by_parameter_order=False), + [{"data": "d1"}, {"data": "d2"}, {"data": "d3"}], + ) + + hashed_result = tuple(result.all()) + results.add(hashed_result) + if len(results) > 1: + return + else: + assert False, "got same order every time for 15 tries" + + @testing.only_on("postgresql>=13") + @testing.variation("downgrade", [True, False]) + def test_fixture_downgraded(self, connection, metadata, downgrade): + t = Table( + "t", + metadata, + Column( + "id", + Uuid(), + server_default=func.gen_random_uuid(), + primary_key=True, + ), + Column("data", String(50)), + ) + metadata.create_all(connection) + + r1 = connection.execute( + insert(t).returning(t.c.data, sort_by_parameter_order=True), + [{"data": "d1"}, {"data": "d2"}, {"data": "d3"}], + ) + eq_(r1.all(), [("d1",), ("d2",), ("d3",)]) + + if downgrade: + insertmanyvalues_fixture(connection, warn_on_downgraded=True) + + with self._expect_downgrade_warnings( + warn_for_downgrades=True, + sort_by_parameter_order=True, + ): + connection.execute( + insert(t).returning( + t.c.data, sort_by_parameter_order=True + ), + [{"data": "d1"}, {"data": "d2"}, {"data": "d3"}], + ) + else: + # run a plain test to help ensure the fixture doesn't leak to + # other tests + r1 = connection.execute( + insert(t).returning(t.c.data, sort_by_parameter_order=True), + [{"data": "d1"}, {"data": "d2"}, {"data": "d3"}], + ) + eq_(r1.all(), [("d1",), ("d2",), ("d3",)]) + + @testing.variation( + "sequence_type", + [ + ("sequence", testing.requires.sequences), + ("identity", testing.requires.identity_columns), + ], + ) + @testing.variation("increment", ["positive", "negative", "implicit"]) + @testing.variation("explicit_sentinel", [True, False]) + def test_invalid_identities( + self, + metadata, + connection, + warn_for_downgrades, + randomize_returning, + sort_by_parameter_order, + sequence_type: testing.Variation, + increment: testing.Variation, + explicit_sentinel, + ): + if sequence_type.sequence: + seq_cls = functools.partial(Sequence, name="t1_id_seq") + elif sequence_type.identity: + seq_cls = Identity + else: + sequence_type.fail() + + if increment.implicit: + sequence = seq_cls(start=1) + elif increment.positive: + sequence = seq_cls(start=1, increment=1) + elif increment.negative: + sequence = seq_cls(start=-1, increment=-1) + else: + increment.fail() + + t1 = Table( + "t1", + metadata, + Column( + "id", + Integer, + sequence, + primary_key=True, + insert_sentinel=bool(explicit_sentinel), + ), + Column("data", String(50)), + ) + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + stmt = insert(t1).returning( + t1.c.id, + t1.c.data, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + data = [{"data": f"d{i}"} for i in range(10)] + + use_imv = testing.db.dialect.use_insertmanyvalues + if ( + use_imv + and increment.negative + and explicit_sentinel + and sort_by_parameter_order + ): + with expect_raises_message( + exc.InvalidRequestError, + rf"Can't use " + rf"{'SEQUENCE' if sequence_type.sequence else 'IDENTITY'} " + rf"default with negative increment", + ): + connection.execute(stmt, data) + return + elif ( + use_imv + and explicit_sentinel + and sort_by_parameter_order + and sequence_type.sequence + and not ( + testing.db.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.SEQUENCE + ) + ): + with expect_raises_message( + exc.InvalidRequestError, + r"Column t1.id can't be explicitly marked as a sentinel " + r"column .* as the particular type of default generation", + ): + connection.execute(stmt, data) + return + + with self._expect_downgrade_warnings( + warn_for_downgrades=warn_for_downgrades, + sort_by_parameter_order=sort_by_parameter_order, + server_autoincrement=not increment.negative, + autoincrement_is_sequence=sequence_type.sequence, + ): + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + coll = list + else: + coll = set + + if increment.negative: + expected_data = [(-1 - i, f"d{i}") for i in range(10)] + else: + expected_data = [(i + 1, f"d{i}") for i in range(10)] + + eq_( + coll(result), + coll(expected_data), + ) + + @testing.combinations( + Integer(), + String(50), + (ARRAY(Integer()), testing.requires.array_type), + DateTime(), + Uuid(), + argnames="datatype", + ) + def test_inserts_w_all_nulls( + self, connection, metadata, sort_by_parameter_order, datatype + ): + """this test is geared towards the INSERT..SELECT VALUES case, + where if the VALUES have all NULL for some column, PostgreSQL assumes + the datatype must be TEXT and throws for other table datatypes. So an + additional layer of casts is applied to the SELECT p0,p1, p2... part of + the statement for all datatypes unconditionally. Even though the VALUES + clause also has bind casts for selected datatypes, this NULL handling + is needed even for simple datatypes. We'd prefer not to render bind + casts for all possible datatypes as that affects other kinds of + statements as well and also is very verbose for insertmanyvalues. + + + """ + t = Table( + "t", + metadata, + Column("id", Integer, Identity(), primary_key=True), + Column("data", datatype), + ) + metadata.create_all(connection) + result = connection.execute( + insert(t).returning( + t.c.id, + sort_by_parameter_order=bool(sort_by_parameter_order), + ), + [{"data": None}, {"data": None}, {"data": None}], + ) + eq_(set(result), {(1,), (2,), (3,)}) + + @testing.variation("pk_type", ["autoinc", "clientside"]) + @testing.variation("add_sentinel", ["none", "clientside", "sentinel"]) + def test_imv_w_additional_values( + self, + metadata, + connection, + sort_by_parameter_order, + pk_type: testing.Variation, + randomize_returning, + warn_for_downgrades, + add_sentinel, + ): + if pk_type.autoinc: + pk_col = Column("id", Integer(), Identity(), primary_key=True) + elif pk_type.clientside: + pk_col = Column("id", Uuid(), default=uuid.uuid4, primary_key=True) + else: + pk_type.fail() + + if add_sentinel.clientside: + extra_col = insert_sentinel( + "sentinel", type_=Uuid(), default=uuid.uuid4 + ) + elif add_sentinel.sentinel: + extra_col = insert_sentinel("sentinel") + else: + extra_col = Column("sentinel", Integer()) + + t1 = Table( + "t1", + metadata, + pk_col, + Column("data", String(30)), + Column("moredata", String(30)), + extra_col, + Column( + "has_server_default", + String(50), + server_default="some_server_default", + ), + ) + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + stmt = ( + insert(t1) + .values(moredata="more data") + .returning( + t1.c.data, + t1.c.moredata, + t1.c.has_server_default, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + ) + data = [{"data": f"d{i}"} for i in range(10)] + + with self._expect_downgrade_warnings( + warn_for_downgrades=warn_for_downgrades, + sort_by_parameter_order=sort_by_parameter_order, + separate_sentinel=not add_sentinel.none, + server_autoincrement=pk_type.autoinc, + client_side_pk=pk_type.clientside, + ): + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + coll = list + else: + coll = set + + eq_( + coll(result), + coll( + [ + (f"d{i}", "more data", "some_server_default") + for i in range(10) + ] + ), + ) + + def test_sentinel_incorrect_rowcount( + self, metadata, connection, sort_by_parameter_order + ): + """test assertions to ensure sentinel values don't have duplicates""" + + uuids = [uuid.uuid4() for i in range(10)] + + # make some dupes + uuids[3] = uuids[5] + uuids[9] = uuids[5] + + t1 = Table( + "data", + metadata, + Column("id", Integer, Identity(), primary_key=True), + Column("data", String(50)), + insert_sentinel( + "uuids", + Uuid(), + default=functools.partial(next, iter(uuids)), + ), + ) + + metadata.create_all(connection) + + stmt = insert(t1).returning( + t1.c.data, + t1.c.uuids, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + data = [{"data": f"d{i}"} for i in range(10)] + + if testing.db.dialect.use_insertmanyvalues and sort_by_parameter_order: + with expect_raises_message( + exc.InvalidRequestError, + "Sentinel-keyed result set did not produce correct " + "number of rows 10; produced 8.", + ): + connection.execute(stmt, data) + else: + result = connection.execute(stmt, data) + eq_( + set(result.all()), + {(f"d{i}", uuids[i]) for i in range(10)}, + ) + + @testing.variation("resolve_sentinel_values", [True, False]) + def test_sentinel_cant_match_keys( + self, + metadata, + connection, + sort_by_parameter_order, + resolve_sentinel_values, + ): + """test assertions to ensure sentinel values passed in parameter + structures can be identified when they come back in cursor.fetchall(). + + Values that are further modified by the database driver or by + SQL expressions (as in the case below) before being INSERTed + won't match coming back out, so datatypes need to implement + _sentinel_value_resolver() if this is the case. + + """ + + class UnsymmetricDataType(TypeDecorator): + cache_ok = True + impl = String + + def bind_expression(self, bindparam): + return func.lower(bindparam) + + if resolve_sentinel_values: + + def _sentinel_value_resolver(self, dialect): + def fix_sentinels(value): + return value.lower() + + return fix_sentinels + + t1 = Table( + "data", + metadata, + Column("id", Integer, Identity(), primary_key=True), + Column("data", String(50)), + insert_sentinel("unsym", UnsymmetricDataType(10)), + ) + + metadata.create_all(connection) + + stmt = insert(t1).returning( + t1.c.data, + t1.c.unsym, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + data = [{"data": f"d{i}", "unsym": f"UPPER_d{i}"} for i in range(10)] + + if ( + testing.db.dialect.use_insertmanyvalues + and sort_by_parameter_order + and not resolve_sentinel_values + ): + with expect_raises_message( + exc.InvalidRequestError, + r"Can't match sentinel values in result set to parameter " + r"sets; key 'UPPER_d.' was not found.", + ): + connection.execute(stmt, data) + else: + result = connection.execute(stmt, data) + eq_( + set(result.all()), + {(f"d{i}", f"upper_d{i}") for i in range(10)}, + ) + + @testing.variation("add_insert_sentinel", [True, False]) + def test_sentinel_insert_default_pk_only( + self, + metadata, + connection, + sort_by_parameter_order, + add_insert_sentinel, + ): + t1 = Table( + "data", + metadata, + Column( + "id", + Integer, + Identity(), + insert_sentinel=bool(add_insert_sentinel), + primary_key=True, + ), + Column("data", String(50)), + ) + + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, randomize_rows=True, warn_on_downgraded=False + ) + + stmt = insert(t1).returning( + t1.c.id, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + data = [{} for i in range(3)] + + if ( + testing.db.dialect.use_insertmanyvalues + and add_insert_sentinel + and sort_by_parameter_order + and not ( + testing.db.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ) + ): + with expect_raises_message( + exc.InvalidRequestError, + "Column data.id can't be explicitly marked as a " + f"sentinel column when using the {testing.db.dialect.name} " + "dialect", + ): + connection.execute(stmt, data) + return + else: + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + # if we used a client side default function, or we had no sentinel + # at all, we're sorted + coll = list + else: + # otherwise we are not, we randomized the order in any case + coll = set + + eq_( + coll(result), + coll( + [ + (1,), + (2,), + (3,), + ] + ), + ) + + @testing.only_on("postgresql>=13") + @testing.variation("default_type", ["server_side", "client_side"]) + @testing.variation("add_insert_sentinel", [True, False]) + def test_no_sentinel_on_non_int_ss_function( + self, + metadata, + connection, + add_insert_sentinel, + default_type, + sort_by_parameter_order, + ): + + t1 = Table( + "data", + metadata, + Column( + "id", + Uuid(), + server_default=func.gen_random_uuid() + if default_type.server_side + else None, + default=uuid.uuid4 if default_type.client_side else None, + primary_key=True, + insert_sentinel=bool(add_insert_sentinel), + ), + Column("data", String(50)), + ) + + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, randomize_rows=True, warn_on_downgraded=False + ) + + stmt = insert(t1).returning( + t1.c.data, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + data = [ + {"data": "d1"}, + {"data": "d2"}, + {"data": "d3"}, + ] + + if ( + default_type.server_side + and add_insert_sentinel + and sort_by_parameter_order + ): + with expect_raises_message( + exc.InvalidRequestError, + r"Column data.id can't be a sentinel column because it uses " + r"an explicit server side default that's not the Identity\(\)", + ): + connection.execute(stmt, data) + return + else: + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + # if we used a client side default function, or we had no sentinel + # at all, we're sorted + coll = list + else: + # otherwise we are not, we randomized the order in any case + coll = set + + eq_( + coll(result), + coll( + [ + ("d1",), + ("d2",), + ("d3",), + ] + ), + ) + + @testing.variation( + "pk_type", + [ + ("plain_autoinc", testing.requires.autoincrement_without_sequence), + ("sequence", testing.requires.sequences), + ("identity", testing.requires.identity_columns), + ], + ) + @testing.variation( + "sentinel", + [ + "none", # passes because we automatically downgrade + # for no sentinel col + "implicit_not_omitted", + "implicit_omitted", + "explicit", + "explicit_but_nullable", + "default_uuid", + "default_string_uuid", + ("identity", testing.requires.multiple_identity_columns), + ("sequence", testing.requires.sequences), + ], + ) + def test_sentinel_col_configurations( + self, + pk_type: testing.Variation, + sentinel: testing.Variation, + sort_by_parameter_order, + randomize_returning, + metadata, + connection, + ): + + if pk_type.plain_autoinc: + pk_col = Column("id", Integer, primary_key=True) + elif pk_type.sequence: + pk_col = Column( + "id", + Integer, + Sequence("result_id_seq", start=1), + primary_key=True, + ) + elif pk_type.identity: + pk_col = Column("id", Integer, Identity(), primary_key=True) + else: + pk_type.fail() + + if sentinel.implicit_not_omitted or sentinel.implicit_omitted: + _sentinel = insert_sentinel( + "sentinel", + omit_from_statements=bool(sentinel.implicit_omitted), + ) + elif sentinel.explicit: + _sentinel = Column( + "some_uuid", Uuid(), nullable=False, insert_sentinel=True + ) + elif sentinel.explicit_but_nullable: + _sentinel = Column("some_uuid", Uuid(), insert_sentinel=True) + elif sentinel.default_uuid or sentinel.default_string_uuid: + _sentinel = Column( + "some_uuid", + Uuid(native_uuid=bool(sentinel.default_uuid)), + insert_sentinel=True, + default=uuid.uuid4, + ) + elif sentinel.identity: + _sentinel = Column( + "some_identity", + Integer, + Identity(), + insert_sentinel=True, + ) + elif sentinel.sequence: + _sentinel = Column( + "some_identity", + Integer, + Sequence("some_id_seq", start=1), + insert_sentinel=True, + ) + else: + _sentinel = Column("some_uuid", Uuid()) + + t = Table("t", metadata, pk_col, Column("data", String(50)), _sentinel) + + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=True, + ) + + stmt = insert(t).returning( + pk_col, + t.c.data, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + if sentinel.explicit: + data = [ + {"data": f"d{i}", "some_uuid": uuid.uuid4()} + for i in range(150) + ] + else: + data = [{"data": f"d{i}"} for i in range(150)] + + expect_sentinel_use = ( + sort_by_parameter_order + and testing.db.dialect.insert_returning + and testing.db.dialect.use_insertmanyvalues + ) + + if sentinel.explicit_but_nullable and expect_sentinel_use: + with expect_raises_message( + exc.InvalidRequestError, + "Column t.some_uuid has been marked as a sentinel column " + "with no default generation function; it at least needs to " + "be marked nullable=False", + ): + connection.execute(stmt, data) + return + + elif ( + expect_sentinel_use + and sentinel.sequence + and not ( + testing.db.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.SEQUENCE + ) + ): + with expect_raises_message( + exc.InvalidRequestError, + "Column t.some_identity can't be explicitly marked as a " + f"sentinel column when using the {testing.db.dialect.name} " + "dialect", + ): + connection.execute(stmt, data) + return + + elif ( + sentinel.none + and expect_sentinel_use + and stmt.compile( + dialect=testing.db.dialect + )._get_sentinel_column_for_table(t) + is None + ): + with expect_warnings( + "Batches were downgraded for sorted INSERT", + raise_on_any_unexpected=True, + ): + result = connection.execute(stmt, data) + else: + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + eq_(list(result), [(i + 1, f"d{i}") for i in range(150)]) + else: + eq_(set(result), {(i + 1, f"d{i}") for i in range(150)}) + + @testing.variation( + "return_type", ["include_sentinel", "default_only", "return_defaults"] + ) + @testing.variation("add_sentinel_flag_to_col", [True, False]) + def test_sentinel_on_non_autoinc_primary_key( + self, + metadata, + connection, + return_type: testing.Variation, + sort_by_parameter_order, + randomize_returning, + add_sentinel_flag_to_col, + ): + uuids = [uuid.uuid4() for i in range(10)] + _some_uuids = iter(uuids) + + t1 = Table( + "data", + metadata, + Column( + "id", + Uuid(), + default=functools.partial(next, _some_uuids), + primary_key=True, + insert_sentinel=bool(add_sentinel_flag_to_col), + ), + Column("data", String(50)), + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=True, + ) + + if sort_by_parameter_order: + collection_cls = list + else: + collection_cls = set + + metadata.create_all(connection) + + if sort_by_parameter_order: + kw = {"sort_by_parameter_order": True} + else: + kw = {} + + if return_type.include_sentinel: + stmt = t1.insert().returning( + t1.c.id, t1.c.data, t1.c.has_server_default, **kw + ) + elif return_type.default_only: + stmt = t1.insert().returning( + t1.c.data, t1.c.has_server_default, **kw + ) + elif return_type.return_defaults: + stmt = t1.insert().return_defaults(**kw) + + else: + return_type.fail() + + r = connection.execute( + stmt, + [{"data": f"d{i}"} for i in range(1, 6)], + ) + + if return_type.include_sentinel: + eq_(r.keys(), ["id", "data", "has_server_default"]) + eq_( + collection_cls(r), + collection_cls( + [ + (uuids[i], f"d{i+1}", "some_server_default") + for i in range(5) + ] + ), + ) + elif return_type.default_only: + eq_(r.keys(), ["data", "has_server_default"]) + eq_( + collection_cls(r), + collection_cls( + [ + ( + f"d{i+1}", + "some_server_default", + ) + for i in range(5) + ] + ), + ) + elif return_type.return_defaults: + eq_(r.keys(), ["has_server_default"]) + eq_(r.inserted_primary_key_rows, [(uuids[i],) for i in range(5)]) + eq_( + r.returned_defaults_rows, + [ + ("some_server_default",), + ("some_server_default",), + ("some_server_default",), + ("some_server_default",), + ("some_server_default",), + ], + ) + eq_(r.all(), []) + else: + return_type.fail() + + def test_client_composite_pk( + self, + metadata, + connection, + randomize_returning, + sort_by_parameter_order, + warn_for_downgrades, + ): + uuids = [uuid.uuid4() for i in range(10)] + + t1 = Table( + "data", + metadata, + Column( + "id1", + Uuid(), + default=functools.partial(next, iter(uuids)), + primary_key=True, + ), + Column( + "id2", + # note this is testing that plain populated PK cols + # also qualify as sentinels since they have to be there + String(30), + primary_key=True, + ), + Column("data", String(50)), + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + result = connection.execute( + insert(t1).returning( + t1.c.id1, + t1.c.id2, + t1.c.data, + t1.c.has_server_default, + sort_by_parameter_order=bool(sort_by_parameter_order), + ), + [{"id2": f"id{i}", "data": f"d{i}"} for i in range(10)], + ) + + if sort_by_parameter_order: + coll = list + else: + coll = set + + eq_( + coll(result), + coll( + [ + (uuids[i], f"id{i}", f"d{i}", "some_server_default") + for i in range(10) + ] + ), + ) + + @testing.variation("add_sentinel", [True, False]) + @testing.variation( + "set_identity", [(True, testing.requires.identity_columns), False] + ) + def test_no_pk( + self, + metadata, + connection, + randomize_returning, + sort_by_parameter_order, + warn_for_downgrades, + add_sentinel, + set_identity, + ): + if set_identity: + id_col = Column("id", Integer(), Identity()) + else: + id_col = Column("id", Integer()) + + uuids = [uuid.uuid4() for i in range(10)] + + sentinel_col = Column( + "unique_id", + Uuid, + default=functools.partial(next, iter(uuids)), + insert_sentinel=bool(add_sentinel), + ) + t1 = Table( + "nopk", + metadata, + id_col, + Column("data", String(50)), + sentinel_col, + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + stmt = insert(t1).returning( + t1.c.id, + t1.c.data, + t1.c.has_server_default, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + if not set_identity: + data = [{"id": i + 1, "data": f"d{i}"} for i in range(10)] + else: + data = [{"data": f"d{i}"} for i in range(10)] + + with self._expect_downgrade_warnings( + warn_for_downgrades=warn_for_downgrades, + sort_by_parameter_order=sort_by_parameter_order, + separate_sentinel=add_sentinel, + ): + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + coll = list + else: + coll = set + + eq_( + coll(result), + coll([(i + 1, f"d{i}", "some_server_default") for i in range(10)]), + ) + + @testing.variation("add_sentinel_to_col", [True, False]) + @testing.variation( + "set_autoincrement", [True, (False, testing.skip_if("mariadb"))] + ) + def test_hybrid_client_composite_pk( + self, + metadata, + connection, + randomize_returning, + sort_by_parameter_order, + warn_for_downgrades, + add_sentinel_to_col, + set_autoincrement, + ): + """test a pk that is part server generated part client generated. + + The server generated col by itself can be the sentinel. if it's + part of the PK and is autoincrement=True then it is automatically + used as such. if not, there's a graceful downgrade. + + """ + + t1 = Table( + "data", + metadata, + Column( + "idint", + Integer, + Identity(), + autoincrement=True if set_autoincrement else "auto", + primary_key=True, + insert_sentinel=bool(add_sentinel_to_col), + ), + Column( + "idstr", + String(30), + primary_key=True, + ), + Column("data", String(50)), + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + + no_autoincrement = ( + not testing.requires.supports_autoincrement_w_composite_pk.enabled # noqa: E501 + ) + if set_autoincrement and no_autoincrement: + with expect_raises_message( + exc.CompileError, + r".*SQLite does not support autoincrement for " + "composite primary keys", + ): + metadata.create_all(connection) + return + else: + + metadata.create_all(connection) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + stmt = insert(t1).returning( + t1.c.idint, + t1.c.idstr, + t1.c.data, + t1.c.has_server_default, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + + if no_autoincrement: + data = [ + {"idint": i + 1, "idstr": f"id{i}", "data": f"d{i}"} + for i in range(10) + ] + else: + data = [{"idstr": f"id{i}", "data": f"d{i}"} for i in range(10)] + + if ( + testing.db.dialect.use_insertmanyvalues + and add_sentinel_to_col + and sort_by_parameter_order + and not ( + testing.db.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ) + ): + with expect_raises_message( + exc.InvalidRequestError, + "Column data.idint can't be explicitly marked as a sentinel " + "column when using the sqlite dialect", + ): + result = connection.execute(stmt, data) + return + + with self._expect_downgrade_warnings( + warn_for_downgrades=warn_for_downgrades, + sort_by_parameter_order=sort_by_parameter_order, + separate_sentinel=not set_autoincrement and add_sentinel_to_col, + server_autoincrement=set_autoincrement, + ): + result = connection.execute(stmt, data) + + if sort_by_parameter_order: + coll = list + else: + coll = set + + eq_( + coll(result), + coll( + [ + (i + 1, f"id{i}", f"d{i}", "some_server_default") + for i in range(10) + ] + ), + ) + + @testing.variation("composite_pk", [True, False]) + @testing.only_on( + [ + "+psycopg", + "+psycopg2", + "+pysqlite", + "+mysqlclient", + "+cx_oracle", + "+oracledb", + ] + ) + def test_failure_mode_if_i_dont_send_value( + self, metadata, connection, sort_by_parameter_order, composite_pk + ): + """test that we get a regular integrity error if a required + PK value was not sent, that is, imv does not get in the way + + """ + t1 = Table( + "data", + metadata, + Column("id", String(30), primary_key=True), + Column("data", String(50)), + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + if composite_pk: + t1.append_column(Column("uid", Uuid(), default=uuid.uuid4)) + + metadata.create_all(connection) + + with expect_warnings( + r".*but has no Python-side or server-side default ", + raise_on_any_unexpected=True, + ): + with expect_raises(exc.IntegrityError): + connection.execute( + insert(t1).returning( + t1.c.id, + t1.c.data, + t1.c.has_server_default, + sort_by_parameter_order=bool(sort_by_parameter_order), + ), + [{"data": f"d{i}"} for i in range(10)], + ) + + @testing.variation("add_sentinel_flag_to_col", [True, False]) + @testing.variation( + "return_type", ["include_sentinel", "default_only", "return_defaults"] + ) + @testing.variation( + "sentinel_type", + [ + ("autoincrement", testing.requires.autoincrement_without_sequence), + "identity", + "sequence", + ], + ) + def test_implicit_autoincrement_sentinel( + self, + metadata, + connection, + return_type: testing.Variation, + sort_by_parameter_order, + randomize_returning, + sentinel_type, + add_sentinel_flag_to_col, + ): + + if sentinel_type.identity: + sentinel_args = [Identity()] + elif sentinel_type.sequence: + sentinel_args = [Sequence("id_seq", start=1)] + else: + sentinel_args = [] + t1 = Table( + "data", + metadata, + Column( + "id", + Integer, + *sentinel_args, + primary_key=True, + insert_sentinel=bool(add_sentinel_flag_to_col), + ), + Column("data", String(50)), + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=False, + ) + + if sort_by_parameter_order: + collection_cls = list + else: + collection_cls = set + + metadata.create_all(connection) + + if sort_by_parameter_order: + kw = {"sort_by_parameter_order": True} + else: + kw = {} + + if return_type.include_sentinel: + stmt = t1.insert().returning( + t1.c.id, t1.c.data, t1.c.has_server_default, **kw + ) + elif return_type.default_only: + stmt = t1.insert().returning( + t1.c.data, t1.c.has_server_default, **kw + ) + elif return_type.return_defaults: + stmt = t1.insert().return_defaults(**kw) + + else: + return_type.fail() + + if ( + testing.db.dialect.use_insertmanyvalues + and add_sentinel_flag_to_col + and sort_by_parameter_order + and ( + not ( + testing.db.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ) + or ( + # currently a SQL Server case, we dont yet render a + # syntax for SQL Server sequence w/ deterministic + # ordering. The INSERT..SELECT could be restructured + # further to support this at a later time however + # sequences with SQL Server are very unusual. + sentinel_type.sequence + and not ( + testing.db.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.SEQUENCE + ) + ) + ) + ): + with expect_raises_message( + exc.InvalidRequestError, + "Column data.id can't be explicitly marked as a " + f"sentinel column when using the {testing.db.dialect.name} " + "dialect", + ): + connection.execute( + stmt, + [{"data": f"d{i}"} for i in range(1, 6)], + ) + return + else: + r = connection.execute( + stmt, + [{"data": f"d{i}"} for i in range(1, 6)], + ) + + if return_type.include_sentinel: + eq_(r.keys(), ["id", "data", "has_server_default"]) + eq_( + collection_cls(r), + collection_cls( + [(i, f"d{i}", "some_server_default") for i in range(1, 6)] + ), + ) + elif return_type.default_only: + eq_(r.keys(), ["data", "has_server_default"]) + eq_( + collection_cls(r), + collection_cls( + [(f"d{i}", "some_server_default") for i in range(1, 6)] + ), + ) + elif return_type.return_defaults: + eq_(r.keys(), ["id", "has_server_default"]) + eq_( + collection_cls(r.inserted_primary_key_rows), + collection_cls([(i + 1,) for i in range(5)]), + ) + eq_( + collection_cls(r.returned_defaults_rows), + collection_cls( + [ + ( + 1, + "some_server_default", + ), + ( + 2, + "some_server_default", + ), + ( + 3, + "some_server_default", + ), + ( + 4, + "some_server_default", + ), + ( + 5, + "some_server_default", + ), + ] + ), + ) + eq_(r.all(), []) + else: + return_type.fail() + + @testing.variation("pk_type", ["serverside", "clientside"]) + @testing.variation( + "sentinel_type", + [ + "use_pk", + ("use_pk_explicit", testing.skip_if("sqlite")), + "separate_uuid", + "separate_sentinel", + ], + ) + @testing.requires.provisioned_upsert + def test_upsert_downgrades( + self, + metadata, + connection, + pk_type: testing.Variation, + sort_by_parameter_order, + randomize_returning, + sentinel_type, + warn_for_downgrades, + ): + if pk_type.serverside: + pk_col = Column( + "id", + Integer(), + primary_key=True, + insert_sentinel=bool(sentinel_type.use_pk_explicit), + ) + elif pk_type.clientside: + pk_col = Column( + "id", + Uuid(), + default=uuid.uuid4, + primary_key=True, + insert_sentinel=bool(sentinel_type.use_pk_explicit), + ) + else: + pk_type.fail() + + if sentinel_type.separate_uuid: + extra_col = Column( + "sent_col", + Uuid(), + default=uuid.uuid4, + insert_sentinel=True, + nullable=False, + ) + elif sentinel_type.separate_sentinel: + extra_col = insert_sentinel("sent_col") + else: + extra_col = Column("sent_col", Integer) + + t1 = Table( + "upsert_table", + metadata, + pk_col, + Column("data", String(50)), + extra_col, + Column( + "has_server_default", + String(30), + server_default="some_server_default", + ), + ) + metadata.create_all(connection) + + result = connection.execute( + insert(t1).returning( + t1.c.id, t1.c.data, sort_by_parameter_order=True + ), + [{"data": "d1"}, {"data": "d2"}], + ) + d1d2 = list(result) + + if pk_type.serverside: + new_ids = [10, 15, 3] + elif pk_type.clientside: + new_ids = [uuid.uuid4() for i in range(3)] + else: + pk_type.fail() + + upsert_data = [ + {"id": d1d2[0][0], "data": "d1 new"}, + {"id": new_ids[0], "data": "d10"}, + {"id": new_ids[1], "data": "d15"}, + {"id": d1d2[1][0], "data": "d2 new"}, + {"id": new_ids[2], "data": "d3"}, + ] + + fixtures.insertmanyvalues_fixture( + connection, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + stmt = provision.upsert( + config, + t1, + (t1.c.data, t1.c.has_server_default), + set_lambda=lambda inserted: { + "data": inserted.data + " upserted", + }, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + + with self._expect_downgrade_warnings( + warn_for_downgrades=warn_for_downgrades, + sort_by_parameter_order=sort_by_parameter_order, + ): + result = connection.execute(stmt, upsert_data) + + expected_data = [ + ("d1 new upserted", "some_server_default"), + ("d10", "some_server_default"), + ("d15", "some_server_default"), + ("d2 new upserted", "some_server_default"), + ("d3", "some_server_default"), + ] + if sort_by_parameter_order: + coll = list + else: + coll = set + + eq_(coll(result), coll(expected_data)) + + def test_auto_downgraded_non_mvi_dialect( + self, + metadata, + testing_engine, + randomize_returning, + warn_for_downgrades, + sort_by_parameter_order, + ): + """Accommodate the case of the dialect that supports RETURNING, but + does not support "multi values INSERT" syntax. + + These dialects should still provide insertmanyvalues/returning + support, using downgraded batching. + + For now, we are still keeping this entire thing "opt in" by requiring + that use_insertmanyvalues=True, which means we can't simplify the + ORM by not worrying about dialects where ordering is available or + not. + + However, dialects that use RETURNING, but don't support INSERT VALUES + (..., ..., ...) can set themselves up like this:: + + class MyDialect(DefaultDialect): + use_insertmanyvalues = True + supports_multivalues_insert = False + + This test runs for everyone **including** Oracle, where we + exercise Oracle using "insertmanyvalues" without "multivalues_insert". + + """ + engine = testing_engine() + engine.connect().close() + + engine.dialect.supports_multivalues_insert = False + engine.dialect.use_insertmanyvalues = True + + uuids = [uuid.uuid4() for i in range(10)] + + t1 = Table( + "t1", + metadata, + Column("id", Uuid(), default=functools.partial(next, iter(uuids))), + Column("data", String(50)), + ) + metadata.create_all(engine) + + with engine.connect() as conn: + + fixtures.insertmanyvalues_fixture( + conn, + randomize_rows=bool(randomize_returning), + warn_on_downgraded=bool(warn_for_downgrades), + ) + + stmt = insert(t1).returning( + t1.c.id, + t1.c.data, + sort_by_parameter_order=bool(sort_by_parameter_order), + ) + data = [{"data": f"d{i}"} for i in range(10)] + + with self._expect_downgrade_warnings( + warn_for_downgrades=warn_for_downgrades, + sort_by_parameter_order=True, # will warn even if not sorted + connection=conn, + ): + result = conn.execute(stmt, data) + + expected_data = [(uuids[i], f"d{i}") for i in range(10)] + if sort_by_parameter_order: + coll = list + else: + coll = set + + eq_(coll(result), coll(expected_data)) diff --git a/test/sql/test_metadata.py b/test/sql/test_metadata.py index 8f6c81f15..a8c6bdbbe 100644 --- a/test/sql/test_metadata.py +++ b/test/sql/test_metadata.py @@ -21,6 +21,7 @@ from sqlalchemy import ForeignKeyConstraint from sqlalchemy import func from sqlalchemy import Identity from sqlalchemy import Index +from sqlalchemy import insert_sentinel from sqlalchemy import Integer from sqlalchemy import MetaData from sqlalchemy import Numeric @@ -48,6 +49,7 @@ from sqlalchemy.sql import naming from sqlalchemy.sql import operators from sqlalchemy.sql.base import _NONE_NAME from sqlalchemy.sql.elements import literal_column +from sqlalchemy.sql.schema import _InsertSentinelColumnDefault from sqlalchemy.sql.schema import RETAIN_SCHEMA from sqlalchemy.testing import assert_raises from sqlalchemy.testing import assert_raises_message @@ -621,7 +623,7 @@ class MetaDataTest(fixtures.TestBase, ComparesTables): c.add_is_dependent_on(a) eq_(meta.sorted_tables, [d, b, a, c, e]) - def test_deterministic_order(self): + def test_sort_by_parameter_order(self): meta = MetaData() a = Table("a", meta, Column("foo", Integer)) b = Table("b", meta, Column("foo", Integer)) @@ -633,7 +635,7 @@ class MetaDataTest(fixtures.TestBase, ComparesTables): a.add_is_dependent_on(b) eq_(meta.sorted_tables, [b, c, d, a, e]) - def test_fks_deterministic_order(self): + def test_fks_sort_by_parameter_order(self): meta = MetaData() a = Table("a", meta, Column("foo", Integer, ForeignKey("b.foo"))) b = Table("b", meta, Column("foo", Integer)) @@ -6079,3 +6081,52 @@ class CopyDialectOptionsTest(fixtures.TestBase): m2 = MetaData() t2 = t1.to_metadata(m2) # make a copy self.check_dialect_options_(t2) + + +class SentinelColTest(fixtures.TestBase): + def make_table_w_sentinel_col(self, *arg, **kw): + return Table( + "t", + MetaData(), + Column("id", Integer, primary_key=True), + Column(*arg, **kw), + ) + + def test_only_one_sentinel(self): + with expect_raises_message( + exc.ArgumentError, + "a Table may have only one explicit sentinel column", + ): + Table( + "t", + MetaData(), + Column("id", Integer, primary_key=True, insert_sentinel=True), + Column("ASdf", String(50)), + insert_sentinel("sentinel"), + ) + + def test_no_sentinel_default_on_notnull(self): + with expect_raises_message( + exc.ArgumentError, + "The _InsertSentinelColumnDefault may only be applied to a " + "Column that is nullable", + ): + self.make_table_w_sentinel_col( + "sentinel", + Integer, + nullable=False, + insert_sentinel=True, + default=_InsertSentinelColumnDefault(), + ) + + def test_no_sentinel_default_on_non_sentinel(self): + with expect_raises_message( + exc.ArgumentError, + "The _InsertSentinelColumnDefault may only be applied to a " + "Column marked as insert_sentinel=True", + ): + self.make_table_w_sentinel_col( + "sentinel", + Integer, + default=_InsertSentinelColumnDefault(), + ) diff --git a/test/sql/test_returning.py b/test/sql/test_returning.py index e0299e334..7d40fa76f 100644 --- a/test/sql/test_returning.py +++ b/test/sql/test_returning.py @@ -15,6 +15,7 @@ from sqlalchemy import table from sqlalchemy import testing from sqlalchemy import type_coerce from sqlalchemy import update +from sqlalchemy.sql import crud from sqlalchemy.sql.sqltypes import NullType from sqlalchemy.testing import assert_raises_message from sqlalchemy.testing import AssertsCompiledSQL @@ -24,6 +25,8 @@ from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_raises_message from sqlalchemy.testing import fixtures from sqlalchemy.testing import is_ +from sqlalchemy.testing import is_false +from sqlalchemy.testing import is_true from sqlalchemy.testing import mock from sqlalchemy.testing import provision from sqlalchemy.testing.schema import Column @@ -85,6 +88,31 @@ class ReturnCombinationTests(fixtures.TestBase, AssertsCompiledSQL): stmt.compile, ) + @testing.combinations("return_defaults", "returning", argnames="methname") + @testing.combinations(insert, update, delete, argnames="construct") + def test_sort_by_parameter_ordering_param( + self, methname, construct, table_fixture + ): + t = table_fixture + + stmt = construct(t) + + if construct is insert: + is_false(stmt._sort_by_parameter_order) + + meth = getattr(stmt, methname) + + if construct in (update, delete): + with expect_raises_message( + sa_exc.ArgumentError, + rf"The 'sort_by_parameter_order' argument to " + rf"{methname}\(\) only applies to INSERT statements", + ): + meth(t.c.id, sort_by_parameter_order=True) + else: + new = meth(t.c.id, sort_by_parameter_order=True) + is_true(new._sort_by_parameter_order) + def test_return_defaults_no_returning(self, table_fixture): t = table_fixture @@ -1347,15 +1375,37 @@ class InsertManyReturningTest(fixtures.TablesTest): t1 = self.tables.type_cases + grm = crud._get_returning_modifiers + + def _grm(*arg, **kw): + ( + need_pks, + implicit_returning, + implicit_return_defaults, + postfetch_lastrowid, + _, + _, + ) = grm(*arg, **kw) + + return ( + need_pks, + implicit_returning, + implicit_return_defaults, + postfetch_lastrowid, + False, + None, + ) + with mock.patch.object( - testing.db.dialect.statement_compiler, - "_insert_stmt_should_use_insertmanyvalues", - lambda *arg: False, + crud, + "_get_returning_modifiers", + new=_grm, ): with expect_raises_message( sa_exc.StatementError, r'Statement does not have "insertmanyvalues" enabled, ' - r"can\'t use INSERT..RETURNING with executemany in this case.", + r"can\'t use INSERT..RETURNING with executemany in this " + "case.", ): connection.execute( t1.insert().returning(t1.c.id, t1.c.goofy, t1.c.full), @@ -1446,7 +1496,7 @@ class InsertManyReturningTest(fixtures.TablesTest): config, t1, (t1.c.id, t1.c.insdef, t1.c.data), - (lambda excluded: {"data": excluded.data + " excluded"}) + set_lambda=(lambda excluded: {"data": excluded.data + " excluded"}) if update_cols else None, ) diff --git a/tools/generate_tuple_map_overloads.py b/tools/generate_tuple_map_overloads.py index d45577344..22259ebe1 100644 --- a/tools/generate_tuple_map_overloads.py +++ b/tools/generate_tuple_map_overloads.py @@ -51,7 +51,7 @@ def process_module(modname: str, filename: str, cmd: code_writer_cmd) -> str: current_fnname = given_fnname = None for line in orig_py: m = re.match( - r"^( *)# START OVERLOADED FUNCTIONS ([\.\w_]+) ([\w_]+) (\d+)-(\d+)$", # noqa: E501 + r"^( *)# START OVERLOADED FUNCTIONS ([\.\w_]+) ([\w_]+) (\d+)-(\d+)(?: \"(.+)\")?", # noqa: E501 line, ) if m: @@ -65,6 +65,7 @@ def process_module(modname: str, filename: str, cmd: code_writer_cmd) -> str: return_type = m.group(3) start_index = int(m.group(4)) end_index = int(m.group(5)) + extra_args = m.group(6) or "" cmd.write_status( f"Generating {start_index}-{end_index} overloads " @@ -94,7 +95,7 @@ def process_module(modname: str, filename: str, cmd: code_writer_cmd) -> str: f""" @overload def {current_fnname}( - {'self, ' if use_self else ''}{", ".join(combination)} + {'self, ' if use_self else ''}{", ".join(combination)}{extra_args} ) -> {return_type}[Tuple[{', '.join(f'_T{i}' for i in range(num_args))}]]: ... |