diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2014-12-07 20:21:20 -0500 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2014-12-07 20:21:20 -0500 |
commit | c42b8f8eb8f4c324e2469bf3baaa316c214abce5 (patch) | |
tree | a5cde5ef779ccdb306351f0ad9624451ea873bdd | |
parent | e257ca6c5268517ec2e9a561372d82dfc10475e8 (diff) | |
download | sqlalchemy-c42b8f8eb8f4c324e2469bf3baaa316c214abce5.tar.gz |
- fix inheritance persistence
- start writing docs
-rw-r--r-- | lib/sqlalchemy/orm/persistence.py | 15 | ||||
-rw-r--r-- | lib/sqlalchemy/orm/session.py | 158 | ||||
-rw-r--r-- | lib/sqlalchemy/orm/sync.py | 17 | ||||
-rw-r--r-- | test/orm/test_bulk.py | 50 |
4 files changed, 215 insertions, 25 deletions
diff --git a/lib/sqlalchemy/orm/persistence.py b/lib/sqlalchemy/orm/persistence.py index 81024c41f..d94fbb040 100644 --- a/lib/sqlalchemy/orm/persistence.py +++ b/lib/sqlalchemy/orm/persistence.py @@ -49,7 +49,7 @@ def _bulk_insert( continue records = ( - (None, state_dict, params, super_mapper, + (None, state_dict, params, mapper, connection, value_params, has_all_pks, has_all_defaults) for state, state_dict, params, mp, @@ -918,7 +918,7 @@ def _finalize_insert_update_commands(base_mapper, uowtransaction, states): def _postfetch(mapper, uowtransaction, table, - state, dict_, result, params, value_params): + state, dict_, result, params, value_params, bulk=False): """Expire attributes in need of newly persisted database state, after an INSERT or UPDATE statement has proceeded for that state.""" @@ -954,10 +954,13 @@ def _postfetch(mapper, uowtransaction, table, # TODO: this still goes a little too often. would be nice to # have definitive list of "columns that changed" here for m, equated_pairs in mapper._table_to_equated[table]: - sync.populate(state, m, state, m, - equated_pairs, - uowtransaction, - mapper.passive_updates) + if state is None: + sync.bulk_populate_inherit_keys(dict_, m, equated_pairs) + else: + sync.populate(state, m, state, m, + equated_pairs, + uowtransaction, + mapper.passive_updates) def _connections_for_states(base_mapper, uowtransaction, states): diff --git a/lib/sqlalchemy/orm/session.py b/lib/sqlalchemy/orm/session.py index 7dd577230..e07b4554e 100644 --- a/lib/sqlalchemy/orm/session.py +++ b/lib/sqlalchemy/orm/session.py @@ -2048,6 +2048,66 @@ class Session(_SessionClassMethods): transaction.rollback(_capture_exception=True) def bulk_save_objects(self, objects, return_defaults=False): + """Perform a bulk save of the given list of objects. + + The bulk save feature allows mapped objects to be used as the + source of simple INSERT and UPDATE operations which can be more easily + grouped together into higher performing "executemany" + operations; the extraction of data from the objects is also performed + using a lower-latency process that ignores whether or not attributes + have actually been modified in the case of UPDATEs, and also ignores + SQL expressions. + + The objects as given are not added to the session and no additional + state is established on them, unless the ``return_defaults`` flag + is also set. + + .. warning:: + + The bulk save feature allows for a lower-latency INSERT/UPDATE + of rows at the expense of a lack of features. Features such + as object management, relationship handling, and SQL clause + support are bypassed in favor of raw INSERT/UPDATES of records. + + **Please read the list of caveats at :ref:`bulk_operations` + before using this method.** + + :param objects: a list of mapped object instances. The mapped + objects are persisted as is, and are **not** associated with the + :class:`.Session` afterwards. + + For each object, whether the object is sent as an INSERT or an + UPDATE is dependent on the same rules used by the :class:`.Session` + in traditional operation; if the object has the + :attr:`.InstanceState.key` + attribute set, then the object is assumed to be "detached" and + will result in an UPDATE. Otherwise, an INSERT is used. + + In the case of an UPDATE, **all** those attributes which are present + and are not part of the primary key are applied to the SET clause + of the UPDATE statement, regardless of whether any change in state + was logged on each attribute; there is no checking of per-attribute + history. The primary key attributes, which are required, + are applied to the WHERE clause. + + :param return_defaults: when True, rows that are missing values which + generate defaults, namely integer primary key defaults and sequences, + will be inserted **one at a time**, so that the primary key value + is available. In particular this will allow joined-inheritance + and other multi-table mappings to insert correctly without the need + to provide primary key values ahead of time; however, + return_defaults mode greatly reduces the performance gains of the + method overall. + + .. seealso:: + + :ref:`bulk_operations` + + :meth:`.Session.bulk_insert_mappings` + + :meth:`.Session.bulk_update_mappings` + + """ for (mapper, isupdate), states in itertools.groupby( (attributes.instance_state(obj) for obj in objects), lambda state: (state.mapper, state.key is not None) @@ -2056,10 +2116,108 @@ class Session(_SessionClassMethods): mapper, states, isupdate, True, return_defaults) def bulk_insert_mappings(self, mapper, mappings, return_defaults=False): + """Perform a bulk insert of the given list of mapping dictionaries. + + The bulk insert feature allows plain Python dictionaries to be used as + the source of simple INSERT operations which can be more easily + grouped together into higher performing "executemany" + operations. Using dictionaries, there is no "history" or session + state management features in use, reducing latency when inserting + large numbers of simple rows. + + The values within the dictionaries as given are typically passed + without modification into Core :meth:`.Insert` constructs, after + organizing the values within them across the tables to which + the given mapper is mapped. + + .. warning:: + + The bulk insert feature allows for a lower-latency INSERT + of rows at the expense of a lack of features. Features such + as relationship handling and SQL clause support are bypassed + in favor of a raw INSERT of records. + + **Please read the list of caveats at :ref:`bulk_operations` + before using this method.** + + :param mapper: a mapped class, or the actual :class:`.Mapper` object, + representing the single kind of object represented within the mapping + list. + + :param mappings: a list of dictionaries, each one containing the state + of the mapped row to be inserted, in terms of the attribute names + on the mapped class. If the mapping refers to multiple tables, + such as a joined-inheritance mapping, each dictionary must contain + all keys to be populated into all tables. + + :param return_defaults: when True, rows that are missing values which + generate defaults, namely integer primary key defaults and sequences, + will be inserted **one at a time**, so that the primary key value + is available. In particular this will allow joined-inheritance + and other multi-table mappings to insert correctly without the need + to provide primary + key values ahead of time; however, return_defaults mode greatly + reduces the performance gains of the method overall. If the rows + to be inserted only refer to a single table, then there is no + reason this flag should be set as the returned default information + is not used. + + + .. seealso:: + + :ref:`bulk_operations` + + :meth:`.Session.bulk_save_objects` + + :meth:`.Session.bulk_update_mappings` + + """ self._bulk_save_mappings( mapper, mappings, False, False, return_defaults) def bulk_update_mappings(self, mapper, mappings): + """Perform a bulk update of the given list of mapping dictionaries. + + The bulk update feature allows plain Python dictionaries to be used as + the source of simple UPDATE operations which can be more easily + grouped together into higher performing "executemany" + operations. Using dictionaries, there is no "history" or session + state management features in use, reducing latency when updating + large numbers of simple rows. + + .. warning:: + + The bulk update feature allows for a lower-latency UPDATE + of rows at the expense of a lack of features. Features such + as relationship handling and SQL clause support are bypassed + in favor of a raw UPDATE of records. + + **Please read the list of caveats at :ref:`bulk_operations` + before using this method.** + + :param mapper: a mapped class, or the actual :class:`.Mapper` object, + representing the single kind of object represented within the mapping + list. + + :param mappings: a list of dictionaries, each one containing the state + of the mapped row to be updated, in terms of the attribute names + on the mapped class. If the mapping refers to multiple tables, + such as a joined-inheritance mapping, each dictionary may contain + keys corresponding to all tables. All those keys which are present + and are not part of the primary key are applied to the SET clause + of the UPDATE statement; the primary key values, which are required, + are applied to the WHERE clause. + + + .. seealso:: + + :ref:`bulk_operations` + + :meth:`.Session.bulk_insert_mappings` + + :meth:`.Session.bulk_save_objects` + + """ self._bulk_save_mappings(mapper, mappings, True, False, False) def _bulk_save_mappings( diff --git a/lib/sqlalchemy/orm/sync.py b/lib/sqlalchemy/orm/sync.py index e1ef85c1d..671c7c067 100644 --- a/lib/sqlalchemy/orm/sync.py +++ b/lib/sqlalchemy/orm/sync.py @@ -45,6 +45,23 @@ def populate(source, source_mapper, dest, dest_mapper, uowcommit.attributes[("pk_cascaded", dest, r)] = True +def bulk_populate_inherit_keys( + source_dict, source_mapper, synchronize_pairs): + # a simplified version of populate() used by bulk insert mode + for l, r in synchronize_pairs: + try: + prop = source_mapper._columntoproperty[l] + value = source_dict[prop.key] + except exc.UnmappedColumnError: + _raise_col_to_prop(False, source_mapper, l, source_mapper, r) + + try: + prop = source_mapper._columntoproperty[r] + source_dict[prop.key] = value + except exc.UnmappedColumnError: + _raise_col_to_prop(True, source_mapper, l, source_mapper, r) + + def clear(dest, dest_mapper, synchronize_pairs): for l, r in synchronize_pairs: if r.primary_key and \ diff --git a/test/orm/test_bulk.py b/test/orm/test_bulk.py index 4bcde2480..f6d2513d1 100644 --- a/test/orm/test_bulk.py +++ b/test/orm/test_bulk.py @@ -10,6 +10,7 @@ from test.orm import _fixtures class BulkTest(testing.AssertsExecutionResults): run_inserts = None + run_define_tables = 'each' class BulkInsertTest(BulkTest, _fixtures.FixtureTest): @@ -75,7 +76,7 @@ class BulkInsertTest(BulkTest, _fixtures.FixtureTest): assert 'id' not in objects[0].__dict__ -class BulkInheritanceTest(fixtures.MappedTest, BulkTest): +class BulkInheritanceTest(BulkTest, fixtures.MappedTest): @classmethod def define_tables(cls, metadata): Table( @@ -197,11 +198,14 @@ class BulkInheritanceTest(fixtures.MappedTest, BulkTest): ), CompiledSQL( - "INSERT INTO boss (golf_swing) VALUES (:golf_swing)", - [{'golf_swing': 'g1'}] + "INSERT INTO boss (boss_id, golf_swing) VALUES " + "(:boss_id, :golf_swing)", + [{'boss_id': 4, 'golf_swing': 'g1'}] ) ) eq_(objects[0].__dict__['person_id'], 1) + eq_(objects[3].__dict__['person_id'], 4) + eq_(objects[3].__dict__['boss_id'], 4) def test_bulk_save_joined_inh_no_defaults(self): Person, Engineer, Manager, Boss = \ @@ -220,7 +224,7 @@ class BulkInheritanceTest(fixtures.MappedTest, BulkTest): person_id=3, name='e2', status='s3', primary_language='l2'), Boss( - person_id=4, + person_id=4, boss_id=4, name='b1', status='s3', manager_name='mn2', golf_swing='g1') ], @@ -264,8 +268,9 @@ class BulkInheritanceTest(fixtures.MappedTest, BulkTest): [{'status': 's3', 'person_id': 4, 'manager_name': 'mn2'}] ), CompiledSQL( - "INSERT INTO boss (golf_swing) VALUES (:golf_swing)", - [{'golf_swing': 'g1'}] + "INSERT INTO boss (boss_id, golf_swing) VALUES " + "(:boss_id, :golf_swing)", + [{'boss_id': 4, 'golf_swing': 'g1'}] ) ) @@ -290,28 +295,35 @@ class BulkInheritanceTest(fixtures.MappedTest, BulkTest): name='b3', status='s3', manager_name='mn3', golf_swing='g3' ), - ] + ], return_defaults=True ) - # the only difference here is that common classes are grouped together. - # at the moment it doesn't lump all the "people" tables from - # different classes together. asserter.assert_( CompiledSQL( "INSERT INTO people (name) VALUES (:name)", - [{'name': 'b1'}, {'name': 'b2'}, {'name': 'b3'}] + [{'name': 'b1'}] + ), + CompiledSQL( + "INSERT INTO people (name) VALUES (:name)", + [{'name': 'b2'}] ), CompiledSQL( - "INSERT INTO managers (status, manager_name) VALUES " - "(:status, :manager_name)", - [{'status': 's1', 'manager_name': 'mn1'}, - {'status': 's2', 'manager_name': 'mn2'}, - {'status': 's3', 'manager_name': 'mn3'}] + "INSERT INTO people (name) VALUES (:name)", + [{'name': 'b3'}] + ), + CompiledSQL( + "INSERT INTO managers (person_id, status, manager_name) " + "VALUES (:person_id, :status, :manager_name)", + [{'person_id': 1, 'status': 's1', 'manager_name': 'mn1'}, + {'person_id': 2, 'status': 's2', 'manager_name': 'mn2'}, + {'person_id': 3, 'status': 's3', 'manager_name': 'mn3'}] ), CompiledSQL( - "INSERT INTO boss (golf_swing) VALUES (:golf_swing)", - [{'golf_swing': 'g1'}, - {'golf_swing': 'g2'}, {'golf_swing': 'g3'}] + "INSERT INTO boss (boss_id, golf_swing) VALUES " + "(:boss_id, :golf_swing)", + [{'golf_swing': 'g1', 'boss_id': 1}, + {'golf_swing': 'g2', 'boss_id': 2}, + {'golf_swing': 'g3', 'boss_id': 3}] ) ) |