- wip...

author: Mike Bayer <mike_mp@zzzcomputing.com> 2015-07-31 13:59:10 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2015-07-31 17:22:57 -0400
commit: 196775b351c796498f393072cffaaf0d8205e9a3 (patch)
tree: 0b90584cba0fb2a8a9baade64a6abcb99520a840
parent: dd6110eed335154e0ae14b2dba13e44af76c4f2b (diff)
download: sqlalchemy-196775b351c796498f393072cffaaf0d8205e9a3.tar.gz
7 files changed, 114 insertions, 21 deletions
diff --git a/doc/build/changelog/migration_11.rst b/doc/build/changelog/migration_11.rst
index f5602a8ad..2ebf10eaa 100644
--- a/doc/build/changelog/migration_11.rst
+++ b/doc/build/changelog/migration_11.rst
@@ -66,6 +66,47 @@ as it relies on deprecated features of setuptools.
 New Features and Improvements - ORM
 ===================================
 
+.. _change_3499:
+
+Changes regarding "unhashable" types
+------------------------------------
+
+The :class:`.Query` object has a well-known behavior of "deduping"
+returned rows that contain at least one ORM-mapped entity (e.g., a
+full mapped object, as opposed to individual column values). The
+primary purpose of this is so that the handling of entities works
+smoothly in conjunction with the identity map, including to
+accommodate for the duplicate entities normally represented within
+joined eager loading, as well as when joins are used for the purposes
+of filtering on additional columns.
+
+This deduplication relies upon the hashability of the elements within
+the row.  With the introduction of Postgresql's special types like
+:class:`.postgresql.ARRAY`, :class:`.postgresql.HSTORE` and
+:class:`.postgresql.JSON`, the experience of types within rows being
+unhashable and encountering problems here is more prevalent than
+it was previously.
+
+In fact, SQLAlchemy has since version 0.8 included a flag on datatypes that
+are noted as "unhashable", however this flag was not used consistently
+on built in types.  As described in :ref:`change_3499_postgresql`, this
+flag is now set consistently for all of Postgresql's "structural" types.
+
+
+The "unhashable" flag is also set on the :class:`.NullType` type.  This
+is key, as :class:`.NullType` is used to refer to any expression of unknown
+type.  In particular, the Postgresql types deal a lot with this type
+as the return value of any indexed accessor from a :class:`.JSON` type
+defaults to :class:`.NullType`, unless the new "type_schema" feature
+is used; see :ref:`change_3503` for details.
+
+Additionally, the treatment of a so-called "unhashable" type is slightly
+different than its been in previous releases; internally we are using
+the ``id()`` function to get a "hash value" from these structures, just
+as we would any ordinary mapped object.   This replaces the previous
+approach which applied a counter to the object.
+
+:ticket:`3499`
 
 New Features and Improvements - Core
 ====================================
@@ -82,6 +123,44 @@ Key Behavioral Changes - Core
 Dialect Improvements and Changes - Postgresql
 =============================================
 
+.. _change_3499_postgresql:
+
+ARRAY and JSON types now correctly specify "unhashable"
+-------------------------------------------------------
+
+As described in :ref:`change_3499`, the ORM relies upon being able to
+produce a hash function for column values when a query's selected entities
+mixes full ORM entities with column expressions.   The ``hashable=False``
+flag is now correctly set on all of PG's "data structure" types, including
+:class:`.ARRAY` and :class:`.JSON`.  The :class:`.JSONB` and :class:`.HSTORE`
+types already included this flag.  For :class:`.ARRAY`,
+this is conditional based on the :paramref:`.postgresql.ARRAY.as_tuple`
+flag, however it should no longer be necessary to set this flag
+in order to have an array value present in a composed ORM row.
+
+Additionally, the type of an expression derived from indexed access
+of a :class:`.JSON` or :class:`.JSONB` type defaults to :class:`.NullType`,
+unless the new "type_schema" feature described in :ref:`change_3503` is used
+to define return types.  The change includes that :class:`.NullType` also
+properly defines ``hashable=False`` so that a sub-list or sub-dictionary
+returned by an index-accessed expression will by default not attempt
+to be hashed.
+
+.. seealso::
+
+    :ref:`change_3499`
+
+    :ref:`change_3503`
+
+:ticket:`3499`
+
+
+ARRAY, JSON, HSTORE, JSONB marked 'unhashable', to allow ORM queries to proceed
+-------------------------------------------------------------------------------
+
+This change will assist in these types being used in ORM queries without
+special steps; see the section :ref:`change_3499` for the full details.
+
 
 Dialect Improvements and Changes - MySQL
 =============================================
diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py
index 64d19eda1..6a4e7f2b5 100644
--- a/lib/sqlalchemy/dialects/postgresql/base.py
+++ b/lib/sqlalchemy/dialects/postgresql/base.py
@@ -1071,6 +1071,11 @@ class ARRAY(sqltypes.Concatenable, sqltypes.TypeEngine):
         self.zero_indexes = zero_indexes
 
     @property
+    def hashable(self):
+        return self.as_tuple
+
+
+    @property
     def python_type(self):
         return list
 
diff --git a/lib/sqlalchemy/dialects/postgresql/json.py b/lib/sqlalchemy/dialects/postgresql/json.py
index 13ebc4afe..3e30b8287 100644
--- a/lib/sqlalchemy/dialects/postgresql/json.py
+++ b/lib/sqlalchemy/dialects/postgresql/json.py
@@ -165,6 +165,8 @@ class JSON(sqltypes.TypeEngine):
 
     __visit_name__ = 'JSON'
 
+    hashable = False
+
     def __init__(self, none_as_null=False):
         """Construct a :class:`.JSON` type.
 
@@ -306,7 +308,6 @@ class JSONB(JSON):
     """
 
     __visit_name__ = 'JSONB'
-    hashable = False
 
     class comparator_factory(sqltypes.Concatenable.Comparator):
         """Define comparison operations for :class:`.JSON`."""
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index b81e98a58..d8bf662fc 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -32,8 +32,7 @@ def instances(query, cursor, context):
 
     context.runid = _new_runid()
 
-    filter_fns = [ent.filter_fn for ent in query._entities]
-    filtered = id in filter_fns
+    filtered = query._has_mapper_entities
 
     single_entity = len(query._entities) == 1 and \
         query._entities[0].supports_single_entity
@@ -43,7 +42,12 @@ def instances(query, cursor, context):
             filter_fn = id
         else:
             def filter_fn(row):
-                return tuple(fn(x) for x, fn in zip(row, filter_fns))
+                return tuple(
+                    id(item)
+                    if ent.use_id_for_hash
+                    else item
+                    for ent, item in zip(query._entities, row)
+                )
 
     try:
         (process, labels) = \
diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py
index 8b3df08e7..3e4ea24a7 100644
--- a/lib/sqlalchemy/orm/query.py
+++ b/lib/sqlalchemy/orm/query.py
@@ -103,6 +103,7 @@ class Query(object):
     _orm_only_adapt = True
     _orm_only_from_obj_alias = True
     _current_path = _path_registry
+    _has_mapper_entities = False
 
     def __init__(self, entities, session=None):
         self.session = session
@@ -3181,12 +3182,14 @@ class _MapperEntity(_QueryEntity):
         if not query._primary_entity:
             query._primary_entity = self
         query._entities.append(self)
-
+        query._has_mapper_entities = True
         self.entities = [entity]
         self.expr = entity
 
     supports_single_entity = True
 
+    use_id_for_hash = True
+
     def setup_entity(self, ext_info, aliased_adapter):
         self.mapper = ext_info.mapper
         self.aliased_adapter = aliased_adapter
@@ -3232,8 +3235,6 @@ class _MapperEntity(_QueryEntity):
             self.mapper, sql_util.ColumnAdapter(
                 from_obj, self.mapper._equivalent_columns))
 
-    filter_fn = id
-
     @property
     def type(self):
         return self.mapper.class_
@@ -3462,6 +3463,8 @@ class Bundle(InspectionAttr):
 
 
 class _BundleEntity(_QueryEntity):
+    use_id_for_hash = False
+
     def __init__(self, query, bundle, setup_entities=True):
         query._entities.append(self)
         self.bundle = self.expr = bundle
@@ -3478,8 +3481,6 @@ class _BundleEntity(_QueryEntity):
 
         self.entities = ()
 
-        self.filter_fn = lambda item: item
-
         self.supports_single_entity = self.bundle.single_entity
 
     @property
@@ -3582,11 +3583,7 @@ class _ColumnEntity(_QueryEntity):
             search_entities = True
 
         self.type = type_ = column.type
-        if type_.hashable:
-            self.filter_fn = lambda item: item
-        else:
-            counter = util.counter()
-            self.filter_fn = lambda item: counter()
+        self.use_id_for_hash = not type_.hashable
 
         # If the Column is unnamed, give it a
         # label() so that mutable column expressions
diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py
index 7e2e601e2..f7780b12d 100644
--- a/lib/sqlalchemy/sql/sqltypes.py
+++ b/lib/sqlalchemy/sql/sqltypes.py
@@ -1648,6 +1648,8 @@ class NullType(TypeEngine):
 
     _isnull = True
 
+    hashable = False
+
     def literal_processor(self, dialect):
         def process(value):
             return "NULL"
diff --git a/test/orm/test_query.py b/test/orm/test_query.py
index 55af023b1..5f290a255 100644
--- a/test/orm/test_query.py
+++ b/test/orm/test_query.py
@@ -1960,13 +1960,6 @@ class FilterTest(QueryTest, AssertsCompiledSQL):
             sess.query(User). \
             filter(User.addresses.any(email_address='fred@fred.com')).all()
 
-        # test that any() doesn't overcorrelate
-        assert [User(id=7), User(id=8)] == \
-            sess.query(User).join("addresses"). \
-            filter(
-                ~User.addresses.any(
-                    Address.email_address == 'fred@fred.com')).all()
-
         # test that the contents are not adapted by the aliased join
         assert [User(id=7), User(id=8)] == \
             sess.query(User).join("addresses", aliased=True). \
@@ -1978,6 +1971,18 @@ class FilterTest(QueryTest, AssertsCompiledSQL):
             sess.query(User).outerjoin("addresses", aliased=True). \
             filter(~User.addresses.any()).all()
 
+    def test_any_doesnt_overcorrelate(self):
+        User, Address = self.classes.User, self.classes.Address
+
+        sess = create_session()
+
+        # test that any() doesn't overcorrelate
+        assert [User(id=7), User(id=8)] == \
+            sess.query(User).join("addresses"). \
+            filter(
+                ~User.addresses.any(
+                    Address.email_address == 'fred@fred.com')).all()
+
     def test_has(self):
         Dingaling, User, Address = (
             self.classes.Dingaling, self.classes.User, self.classes.Address)
author	Mike Bayer <mike_mp@zzzcomputing.com>	2015-07-31 13:59:10 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2015-07-31 17:22:57 -0400
commit	196775b351c796498f393072cffaaf0d8205e9a3 (patch)
tree	0b90584cba0fb2a8a9baade64a6abcb99520a840
parent	dd6110eed335154e0ae14b2dba13e44af76c4f2b (diff)
download	sqlalchemy-196775b351c796498f393072cffaaf0d8205e9a3.tar.gz