- Some changes to how the :attr:`.FromClause.c` collection behaves

when presented with duplicate columns. The behavior of emitting a warning and replacing the old column with the same name still remains to some degree; the replacement in particular is to maintain backwards compatibility. However, the replaced column still remains associated with the ``c`` collection now in a collection ``._all_columns``, which is used by constructs such as aliases and unions, to deal with the set of columns in ``c`` more towards what is actually in the list of columns rather than the unique set of key names. This helps with situations where SELECT statements with same-named columns are used in unions and such, so that the union can match the columns up positionally and also there's some chance of :meth:`.FromClause.corresponding_column` still being usable here (it can now return a column that is only in selectable.c._all_columns and not otherwise named). The new collection is underscored as we still need to decide where this list might end up. Theoretically it would become the result of iter(selectable.c), however this would mean that the length of the iteration would no longer match the length of keys(), and that behavior needs to be checked out. fixes #2974 - add a bunch more tests for ColumnCollection
author: Mike Bayer <mike_mp@zzzcomputing.com> 2014-02-26 15:34:49 -0500
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2014-02-26 15:34:49 -0500
commit: 302ad6228a12fe5cb4c5d332e5bab65ed373bc01 (patch)
tree: 9e39551b168090a75c28c1d5ba45ef24cdbfc1da
parent: bf67069d264cba3feed8a48614289d605ed61a55 (diff)
download: sqlalchemy-302ad6228a12fe5cb4c5d332e5bab65ed373bc01.tar.gz
5 files changed, 269 insertions, 33 deletions
diff --git a/doc/build/changelog/changelog_09.rst b/doc/build/changelog/changelog_09.rst
index a6245bdb7..9a1ff9f27 100644
--- a/doc/build/changelog/changelog_09.rst
+++ b/doc/build/changelog/changelog_09.rst
@@ -16,6 +16,30 @@
 
     .. change::
         :tags: bug, sql
+        :tickets: 2974
+
+        Some changes to how the :attr:`.FromClause.c` collection behaves
+        when presented with duplicate columns.  The behavior of emitting a
+        warning and replacing the old column with the same name still
+        remains to some degree; the replacement in particular is to maintain
+        backwards compatibility.  However, the replaced column still remains
+        associated with the ``c`` collection now in a collection ``._all_columns``,
+        which is used by constructs such as aliases and unions, to deal with
+        the set of columns in ``c`` more towards what is actually in the
+        list of columns rather than the unique set of key names.  This helps
+        with situations where SELECT statements with same-named columns
+        are used in unions and such, so that the union can match the columns
+        up positionally and also there's some chance of :meth:`.FromClause.corresponding_column`
+        still being usable here (it can now return a column that is only
+        in selectable.c._all_columns and not otherwise named).
+        The new collection is underscored as we still need to decide where this
+        list might end up.   Theoretically it
+        would become the result of iter(selectable.c), however this would mean
+        that the length of the iteration would no longer match the length of
+        keys(), and that behavior needs to be checked out.
+
+    .. change::
+        :tags: bug, sql
 
         Fixed issue in new :meth:`.TextClause.columns` method where the ordering
         of columns given positionally would not be preserved.   This could
diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py
index 260cdab66..c2bdd8b1c 100644
--- a/lib/sqlalchemy/sql/base.py
+++ b/lib/sqlalchemy/sql/base.py
@@ -435,10 +435,10 @@ class ColumnCollection(util.OrderedProperties):
 
     """
 
-    def __init__(self, *cols):
+    def __init__(self):
         super(ColumnCollection, self).__init__()
-        self._data.update((c.key, c) for c in cols)
-        self.__dict__['_all_cols'] = util.column_set(self)
+        self.__dict__['_all_col_set'] = util.column_set()
+        self.__dict__['_all_columns'] = []
 
     def __str__(self):
         return repr([str(c) for c in self])
@@ -459,15 +459,26 @@ class ColumnCollection(util.OrderedProperties):
            Used by schema.Column to override columns during table reflection.
 
         """
+        remove_col = None
         if column.name in self and column.key != column.name:
             other = self[column.name]
             if other.name == other.key:
-                del self._data[other.name]
-                self._all_cols.remove(other)
+                remove_col = other
+                self._all_col_set.remove(other)
+                del self._data[other.key]
+
         if column.key in self._data:
-            self._all_cols.remove(self._data[column.key])
-        self._all_cols.add(column)
+            remove_col = self._data[column.key]
+            self._all_col_set.remove(remove_col)
+
+        self._all_col_set.add(column)
         self._data[column.key] = column
+        if remove_col is not None:
+            self._all_columns[:] = [column if c is remove_col
+                                            else c for c in self._all_columns]
+        else:
+            self._all_columns.append(column)
+
 
     def add(self, column):
         """Add a column to this collection.
@@ -497,37 +508,41 @@ class ColumnCollection(util.OrderedProperties):
                           '%r, which has the same key.  Consider '
                           'use_labels for select() statements.' % (key,
                           getattr(existing, 'table', None), value))
-            self._all_cols.remove(existing)
+
             # pop out memoized proxy_set as this
             # operation may very well be occurring
             # in a _make_proxy operation
             util.memoized_property.reset(value, "proxy_set")
-        self._all_cols.add(value)
+
+        self._all_col_set.add(value)
+        self._all_columns.append(value)
         self._data[key] = value
 
     def clear(self):
-        self._data.clear()
-        self._all_cols.clear()
+        raise NotImplementedError()
 
     def remove(self, column):
-        del self._data[column.key]
-        self._all_cols.remove(column)
+        raise NotImplementedError()
 
-    def update(self, value):
-        self._data.update(value)
-        self._all_cols.clear()
-        self._all_cols.update(self._data.values())
+    def update(self, iter):
+        cols = list(iter)
+        self._all_columns.extend(c for label, c in cols)
+        self._all_col_set.update(c for label, c in cols)
+        self._data.update((label, c) for label, c in cols)
 
     def extend(self, iter):
-        self.update((c.key, c) for c in iter)
+        cols = list(iter)
+        self._all_columns.extend(cols)
+        self._all_col_set.update(cols)
+        self._data.update((c.key, c) for c in cols)
 
     __hash__ = None
 
     @util.dependencies("sqlalchemy.sql.elements")
     def __eq__(self, elements, other):
         l = []
-        for c in other:
-            for local in self:
+        for c in getattr(other, "_all_columns", other):
+            for local in self._all_columns:
                 if c.shares_lineage(local):
                     l.append(c == local)
         return elements.and_(*l)
@@ -537,22 +552,28 @@ class ColumnCollection(util.OrderedProperties):
             raise exc.ArgumentError("__contains__ requires a string argument")
         return util.OrderedProperties.__contains__(self, other)
 
+    def __getstate__(self):
+        return {'_data': self.__dict__['_data'],
+                '_all_columns': self.__dict__['_all_columns']}
+
     def __setstate__(self, state):
         self.__dict__['_data'] = state['_data']
-        self.__dict__['_all_cols'] = util.column_set(self._data.values())
+        self.__dict__['_all_columns'] = state['_all_columns']
+        self.__dict__['_all_col_set'] = util.column_set(state['_all_columns'])
 
     def contains_column(self, col):
         # this has to be done via set() membership
-        return col in self._all_cols
+        return col in self._all_col_set
 
     def as_immutable(self):
-        return ImmutableColumnCollection(self._data, self._all_cols)
+        return ImmutableColumnCollection(self._data, self._all_col_set, self._all_columns)
 
 
 class ImmutableColumnCollection(util.ImmutableProperties, ColumnCollection):
-    def __init__(self, data, colset):
+    def __init__(self, data, colset, all_columns):
         util.ImmutableProperties.__init__(self, data)
-        self.__dict__['_all_cols'] = colset
+        self.__dict__['_all_col_set'] = colset
+        self.__dict__['_all_columns'] = all_columns
 
     extend = remove = util.ImmutableProperties._immutable
 
diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py
index 59d6687b5..d59b45fae 100644
--- a/lib/sqlalchemy/sql/selectable.py
+++ b/lib/sqlalchemy/sql/selectable.py
@@ -342,7 +342,7 @@ class FromClause(Selectable):
             return column
         col, intersect = None, None
         target_set = column.proxy_set
-        cols = self.c
+        cols = self.c._all_columns
         for c in cols:
             expanded_proxy_set = set(_expand_cloned(c.proxy_set))
             i = target_set.intersection(expanded_proxy_set)
@@ -934,6 +934,7 @@ class Alias(FromClause):
                     or 'anon'))
         self.name = name
 
+
     @property
     def description(self):
         if util.py3k:
@@ -954,7 +955,7 @@ class Alias(FromClause):
         return self.element.is_derived_from(fromclause)
 
     def _populate_column_collection(self):
-        for col in self.element.columns:
+        for col in self.element.columns._all_columns:
             col._make_proxy(self)
 
     def _refresh_for_new_column(self, column):
@@ -1738,13 +1739,13 @@ class CompoundSelect(GenerativeSelect):
             s = _clause_element_as_expr(s)
 
             if not numcols:
-                numcols = len(s.c)
-            elif len(s.c) != numcols:
+                numcols = len(s.c._all_columns)
+            elif len(s.c._all_columns) != numcols:
                 raise exc.ArgumentError('All selectables passed to '
                         'CompoundSelect must have identical numbers of '
                         'columns; select #%d has %d columns, select '
-                        '#%d has %d' % (1, len(self.selects[0].c), n
-                        + 1, len(s.c)))
+                        '#%d has %d' % (1, len(self.selects[0].c._all_columns), n
+                        + 1, len(s.c._all_columns)))
 
             self.selects.append(s.self_group(self))
 
@@ -1876,7 +1877,7 @@ class CompoundSelect(GenerativeSelect):
         return False
 
     def _populate_column_collection(self):
-        for cols in zip(*[s.c for s in self.selects]):
+        for cols in zip(*[s.c._all_columns for s in self.selects]):
 
             # this is a slightly hacky thing - the union exports a
             # column that resembles just that of the *first* selectable.
diff --git a/test/base/test_utils.py b/test/base/test_utils.py
index 86e4b190a..e6ea06296 100644
--- a/test/base/test_utils.py
+++ b/test/base/test_utils.py
@@ -5,7 +5,7 @@ from sqlalchemy.testing import assert_raises, assert_raises_message, fixtures
 from sqlalchemy.testing import eq_, is_, ne_, fails_if
 from sqlalchemy.testing.util import picklers, gc_collect
 from sqlalchemy.util import classproperty, WeakSequence, get_callable_argspec
-
+from sqlalchemy.sql import column
 
 class KeyedTupleTest():
 
@@ -298,6 +298,161 @@ class ColumnCollectionTest(fixtures.TestBase):
         assert (cc1 == cc2).compare(c1 == c2)
         assert not (cc1 == cc3).compare(c2 == c3)
 
+    @testing.emits_warning("Column ")
+    def test_dupes_add(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2a, c3, c2b = column('c1'), column('c2'), column('c3'), column('c2')
+
+        cc.add(c1)
+        cc.add(c2a)
+        cc.add(c3)
+        cc.add(c2b)
+
+        eq_(cc._all_columns, [c1, c2a, c3, c2b])
+
+        # for iter, c2a is replaced by c2b, ordering
+        # is maintained in that way.  ideally, iter would be
+        # the same as the "_all_columns" collection.
+        eq_(list(cc), [c1, c2b, c3])
+
+        assert cc.contains_column(c2a)
+        assert cc.contains_column(c2b)
+
+        ci = cc.as_immutable()
+        eq_(ci._all_columns, [c1, c2a, c3, c2b])
+        eq_(list(ci), [c1, c2b, c3])
+
+    def test_replace(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2a, c3, c2b = column('c1'), column('c2'), column('c3'), column('c2')
+
+        cc.add(c1)
+        cc.add(c2a)
+        cc.add(c3)
+
+        cc.replace(c2b)
+
+        eq_(cc._all_columns, [c1, c2b, c3])
+        eq_(list(cc), [c1, c2b, c3])
+
+        assert not cc.contains_column(c2a)
+        assert cc.contains_column(c2b)
+
+        ci = cc.as_immutable()
+        eq_(ci._all_columns, [c1, c2b, c3])
+        eq_(list(ci), [c1, c2b, c3])
+
+    def test_replace_key_matches(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2a, c3, c2b = column('c1'), column('c2'), column('c3'), column('X')
+        c2b.key = 'c2'
+
+        cc.add(c1)
+        cc.add(c2a)
+        cc.add(c3)
+
+        cc.replace(c2b)
+
+        assert not cc.contains_column(c2a)
+        assert cc.contains_column(c2b)
+
+        eq_(cc._all_columns, [c1, c2b, c3])
+        eq_(list(cc), [c1, c2b, c3])
+
+        ci = cc.as_immutable()
+        eq_(ci._all_columns, [c1, c2b, c3])
+        eq_(list(ci), [c1, c2b, c3])
+
+    def test_replace_name_matches(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2a, c3, c2b = column('c1'), column('c2'), column('c3'), column('c2')
+        c2b.key = 'X'
+
+        cc.add(c1)
+        cc.add(c2a)
+        cc.add(c3)
+
+        cc.replace(c2b)
+
+        assert not cc.contains_column(c2a)
+        assert cc.contains_column(c2b)
+
+        eq_(cc._all_columns, [c1, c2b, c3])
+        eq_(list(cc), [c1, c3, c2b])
+
+        ci = cc.as_immutable()
+        eq_(ci._all_columns, [c1, c2b, c3])
+        eq_(list(ci), [c1, c3, c2b])
+
+    def test_replace_no_match(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2, c3, c4 = column('c1'), column('c2'), column('c3'), column('c4')
+        c4.key = 'X'
+
+        cc.add(c1)
+        cc.add(c2)
+        cc.add(c3)
+
+        cc.replace(c4)
+
+        assert cc.contains_column(c2)
+        assert cc.contains_column(c4)
+
+        eq_(cc._all_columns, [c1, c2, c3, c4])
+        eq_(list(cc), [c1, c2, c3, c4])
+
+        ci = cc.as_immutable()
+        eq_(ci._all_columns, [c1, c2, c3, c4])
+        eq_(list(ci), [c1, c2, c3, c4])
+
+    def test_dupes_extend(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2a, c3, c2b = column('c1'), column('c2'), column('c3'), column('c2')
+
+        cc.add(c1)
+        cc.add(c2a)
+
+        cc.extend([c3, c2b])
+
+        eq_(cc._all_columns, [c1, c2a, c3, c2b])
+
+        # for iter, c2a is replaced by c2b, ordering
+        # is maintained in that way.  ideally, iter would be
+        # the same as the "_all_columns" collection.
+        eq_(list(cc), [c1, c2b, c3])
+
+        assert cc.contains_column(c2a)
+        assert cc.contains_column(c2b)
+
+        ci = cc.as_immutable()
+        eq_(ci._all_columns, [c1, c2a, c3, c2b])
+        eq_(list(ci), [c1, c2b, c3])
+
+    def test_dupes_update(self):
+        cc = sql.ColumnCollection()
+
+        c1, c2a, c3, c2b = column('c1'), column('c2'), column('c3'), column('c2')
+
+        cc.add(c1)
+        cc.add(c2a)
+
+        cc.update([(c3.key, c3), (c2b.key, c2b)])
+
+        eq_(cc._all_columns, [c1, c2a, c3, c2b])
+
+        assert cc.contains_column(c2a)
+        assert cc.contains_column(c2b)
+
+        # for iter, c2a is replaced by c2b, ordering
+        # is maintained in that way.  ideally, iter would be
+        # the same as the "_all_columns" collection.
+        eq_(list(cc), [c1, c2b, c3])
 
 class LRUTest(fixtures.TestBase):
 
diff --git a/test/sql/test_selectable.py b/test/sql/test_selectable.py
index 9617cfdf7..5d3d53b88 100644
--- a/test/sql/test_selectable.py
+++ b/test/sql/test_selectable.py
@@ -413,6 +413,41 @@ class SelectableTest(fixtures.TestBase, AssertsExecutionResults, AssertsCompiled
         assert u2.corresponding_column(s1.c.col1) is u2.c.col1
         assert u2.corresponding_column(s2.c.col1) is u2.c.col1
 
+    @testing.emits_warning("Column 'col1'")
+    def test_union_dupe_keys(self):
+        s1 = select([table1.c.col1, table1.c.col2, table2.c.col1])
+        s2 = select([table2.c.col1, table2.c.col2, table2.c.col3])
+        u1 = union(s1, s2)
+
+        assert u1.corresponding_column(s1.c._all_columns[0]) is u1.c._all_columns[0]
+        assert u1.corresponding_column(s2.c.col1) is u1.c._all_columns[0]
+        assert u1.corresponding_column(s1.c.col2) is u1.c.col2
+        assert u1.corresponding_column(s2.c.col2) is u1.c.col2
+
+        assert u1.corresponding_column(s2.c.col3) is u1.c._all_columns[2]
+
+        assert u1.corresponding_column(table2.c.col1) is u1.c._all_columns[2]
+        assert u1.corresponding_column(table2.c.col3) is u1.c._all_columns[2]
+
+    @testing.emits_warning("Column 'col1'")
+    def test_union_alias_dupe_keys(self):
+        s1 = select([table1.c.col1, table1.c.col2, table2.c.col1]).alias()
+        s2 = select([table2.c.col1, table2.c.col2, table2.c.col3])
+        u1 = union(s1, s2)
+
+        assert u1.corresponding_column(s1.c._all_columns[0]) is u1.c._all_columns[0]
+        assert u1.corresponding_column(s2.c.col1) is u1.c._all_columns[0]
+        assert u1.corresponding_column(s1.c.col2) is u1.c.col2
+        assert u1.corresponding_column(s2.c.col2) is u1.c.col2
+
+        assert u1.corresponding_column(s2.c.col3) is u1.c._all_columns[2]
+
+        # this differs from the non-alias test because table2.c.col1 is
+        # more directly at s2.c.col1 than it is s1.c.col1.
+        assert u1.corresponding_column(table2.c.col1) is u1.c._all_columns[0]
+        assert u1.corresponding_column(table2.c.col3) is u1.c._all_columns[2]
+
+
     def test_select_union(self):
 
         # like testaliasunion, but off a Select off the union.
author	Mike Bayer <mike_mp@zzzcomputing.com>	2014-02-26 15:34:49 -0500
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2014-02-26 15:34:49 -0500
commit	302ad6228a12fe5cb4c5d332e5bab65ed373bc01 (patch)
tree	9e39551b168090a75c28c1d5ba45ef24cdbfc1da
parent	bf67069d264cba3feed8a48614289d605ed61a55 (diff)
download	sqlalchemy-302ad6228a12fe5cb4c5d332e5bab65ed373bc01.tar.gz