diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2021-12-17 18:04:47 -0500 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-01-14 16:54:13 -0500 |
| commit | 06f83c26ea3636eaec0b85fc9d733ab4bfb827ec (patch) | |
| tree | 13d43b9007f956bf514d757ce6781a378125fc3e | |
| parent | a869dc8fe3cd579ed9bab665d215a6c3e3d8a4ca (diff) | |
| download | sqlalchemy-06f83c26ea3636eaec0b85fc9d733ab4bfb827ec.tar.gz | |
track item schema names to identify name collisions w/ default schema
Added an additional lookup step to the compiler which will track all FROM
clauses which are tables, that may have the same name shared in multiple
schemas where one of the schemas is the implicit "default" schema; in this
case, the table name when referring to that name without a schema
qualification will be rendered with an anonymous alias name at the compiler
level in order to disambiguate the two (or more) names. The approach of
schema-qualifying the normally unqualified name with the server-detected
"default schema name" value was also considered, however this approach
doesn't apply to Oracle nor is it accepted by SQL Server, nor would it work
with multiple entries in the PostgreSQL search path. The name collision
issue resolved here has been identified as affecting at least Oracle,
PostgreSQL, SQL Server, MySQL and MariaDB.
Fixes: #7471
Change-Id: Id65e7ca8c43fe8d95777084e8d5ec140ebcd784d
| -rw-r--r-- | doc/build/changelog/unreleased_20/7471.rst | 17 | ||||
| -rw-r--r-- | lib/sqlalchemy/orm/context.py | 1 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/base.py | 2 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/compiler.py | 28 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/elements.py | 1 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/selectable.py | 28 | ||||
| -rw-r--r-- | lib/sqlalchemy/testing/suite/test_select.py | 99 | ||||
| -rw-r--r-- | test/requirements.py | 3 | ||||
| -rw-r--r-- | test/sql/test_compiler.py | 72 |
9 files changed, 248 insertions, 3 deletions
diff --git a/doc/build/changelog/unreleased_20/7471.rst b/doc/build/changelog/unreleased_20/7471.rst new file mode 100644 index 000000000..344bc2769 --- /dev/null +++ b/doc/build/changelog/unreleased_20/7471.rst @@ -0,0 +1,17 @@ +.. change:: + :tags: bug, sql + :tickets: 7471 + + Added an additional lookup step to the compiler which will track all FROM + clauses which are tables, that may have the same name shared in multiple + schemas where one of the schemas is the implicit "default" schema; in this + case, the table name when referring to that name without a schema + qualification will be rendered with an anonymous alias name at the compiler + level in order to disambiguate the two (or more) names. The approach of + schema-qualifying the normally unqualified name with the server-detected + "default schema name" value was also considered, however this approach + doesn't apply to Oracle nor is it accepted by SQL Server, nor would it work + with multiple entries in the PostgreSQL search path. The name collision + issue resolved here has been identified as affecting at least Oracle, + PostgreSQL, SQL Server, MySQL and MariaDB. + diff --git a/lib/sqlalchemy/orm/context.py b/lib/sqlalchemy/orm/context.py index cba7cf07d..4cff8defb 100644 --- a/lib/sqlalchemy/orm/context.py +++ b/lib/sqlalchemy/orm/context.py @@ -683,7 +683,6 @@ class ORMSelectCompileState(ORMCompileState, SelectState): self._setup_for_generate() SelectState.__init__(self, self.statement, compiler, **kw) - return self def _dump_option_struct(self): diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 6ab9a75f6..ae586c9f2 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -499,7 +499,7 @@ class CompileState: """ - __slots__ = ("statement",) + __slots__ = ("statement", "_ambiguous_table_name_map") plugins = {} diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index cb10811c6..af39f0672 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -1466,6 +1466,7 @@ class SQLCompiler(Compiled): add_to_result_map=None, include_table=True, result_map_targets=(), + ambiguous_table_name_map=None, **kwargs, ): name = orig_name = column.name @@ -1502,6 +1503,14 @@ class SQLCompiler(Compiled): else: schema_prefix = "" tablename = table.name + + if ( + not effective_schema + and ambiguous_table_name_map + and tablename in ambiguous_table_name_map + ): + tablename = ambiguous_table_name_map[tablename] + if isinstance(tablename, elements._truncated_label): tablename = self._truncated_identifier("alias", tablename) @@ -3252,6 +3261,10 @@ class SQLCompiler(Compiled): compile_state = select_stmt._compile_state_factory( select_stmt, self, **kwargs ) + kwargs[ + "ambiguous_table_name_map" + ] = compile_state._ambiguous_table_name_map + select_stmt = compile_state.statement toplevel = not self.stack @@ -3732,6 +3745,7 @@ class SQLCompiler(Compiled): fromhints=None, use_schema=True, from_linter=None, + ambiguous_table_name_map=None, **kwargs, ): if from_linter: @@ -3748,6 +3762,20 @@ class SQLCompiler(Compiled): ) else: ret = self.preparer.quote(table.name) + + if ( + not effective_schema + and ambiguous_table_name_map + and table.name in ambiguous_table_name_map + ): + anon_name = self._truncated_identifier( + "alias", ambiguous_table_name_map[table.name] + ) + + ret = ret + self.get_render_as_alias_suffix( + self.preparer.format_alias(None, anon_name) + ) + if fromhints and table in fromhints: ret = self.format_from_hint_text( ret, table, fromhints[table], iscrud diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index a025cce35..1fa312b7e 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -286,6 +286,7 @@ class ClauseElement( is_clause_element = True is_selectable = False + _is_table = False _is_textual = False _is_from_clause = False _is_returns_rows = False diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index e674c4b74..6a7b83504 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -2484,6 +2484,8 @@ class TableClause(roles.DMLTableRole, Immutable, FromClause): named_with_column = True + _is_table = True + implicit_returning = False """:class:`_expression.TableClause` doesn't support having a primary key or column @@ -3980,6 +3982,8 @@ class SelectState(util.MemoizedSlots, CompileState): return go def _get_froms(self, statement): + self._ambiguous_table_name_map = ambiguous_table_name_map = {} + return self._normalize_froms( itertools.chain( itertools.chain.from_iterable( @@ -3997,10 +4001,16 @@ class SelectState(util.MemoizedSlots, CompileState): self.from_clauses, ), check_statement=statement, + ambiguous_table_name_map=ambiguous_table_name_map, ) @classmethod - def _normalize_froms(cls, iterable_of_froms, check_statement=None): + def _normalize_froms( + cls, + iterable_of_froms, + check_statement=None, + ambiguous_table_name_map=None, + ): """given an iterable of things to select FROM, reduce them to what would actually render in the FROM clause of a SELECT. @@ -4013,6 +4023,7 @@ class SelectState(util.MemoizedSlots, CompileState): froms = [] for item in iterable_of_froms: + if item._is_subquery and item.element is check_statement: raise exc.InvalidRequestError( "select() construct refers to itself as a FROM" @@ -4033,6 +4044,21 @@ class SelectState(util.MemoizedSlots, CompileState): # using a list to maintain ordering froms = [f for f in froms if f not in toremove] + if ambiguous_table_name_map is not None: + ambiguous_table_name_map.update( + ( + fr.name, + _anonymous_label.safe_construct( + hash(fr.name), fr.name + ), + ) + for item in froms + for fr in item._from_objects + if fr._is_table + and fr.schema + and fr.name not in ambiguous_table_name_map + ) + return froms def _get_display_froms( diff --git a/lib/sqlalchemy/testing/suite/test_select.py b/lib/sqlalchemy/testing/suite/test_select.py index c1228f5df..92fd29503 100644 --- a/lib/sqlalchemy/testing/suite/test_select.py +++ b/lib/sqlalchemy/testing/suite/test_select.py @@ -624,6 +624,105 @@ class FetchLimitOffsetTest(fixtures.TablesTest): eq_(set(fa), set([(3, 3, 4), (4, 4, 5), (5, 4, 6)])) +class SameNamedSchemaTableTest(fixtures.TablesTest): + """tests for #7471""" + + __backend__ = True + + __requires__ = ("schemas",) + + @classmethod + def define_tables(cls, metadata): + Table( + "some_table", + metadata, + Column("id", Integer, primary_key=True), + schema=config.test_schema, + ) + Table( + "some_table", + metadata, + Column("id", Integer, primary_key=True), + Column( + "some_table_id", + Integer, + # ForeignKey("%s.some_table.id" % config.test_schema), + nullable=False, + ), + ) + + @classmethod + def insert_data(cls, connection): + some_table, some_table_schema = cls.tables( + "some_table", "%s.some_table" % config.test_schema + ) + connection.execute(some_table_schema.insert(), {"id": 1}) + connection.execute(some_table.insert(), {"id": 1, "some_table_id": 1}) + + def test_simple_join_both_tables(self, connection): + some_table, some_table_schema = self.tables( + "some_table", "%s.some_table" % config.test_schema + ) + + eq_( + connection.execute( + select(some_table, some_table_schema).join_from( + some_table, + some_table_schema, + some_table.c.some_table_id == some_table_schema.c.id, + ) + ).first(), + (1, 1, 1), + ) + + def test_simple_join_whereclause_only(self, connection): + some_table, some_table_schema = self.tables( + "some_table", "%s.some_table" % config.test_schema + ) + + eq_( + connection.execute( + select(some_table) + .join_from( + some_table, + some_table_schema, + some_table.c.some_table_id == some_table_schema.c.id, + ) + .where(some_table.c.id == 1) + ).first(), + (1, 1), + ) + + def test_subquery(self, connection): + some_table, some_table_schema = self.tables( + "some_table", "%s.some_table" % config.test_schema + ) + + subq = ( + select(some_table) + .join_from( + some_table, + some_table_schema, + some_table.c.some_table_id == some_table_schema.c.id, + ) + .where(some_table.c.id == 1) + .subquery() + ) + + eq_( + connection.execute( + select(some_table, subq.c.id) + .join_from( + some_table, + subq, + some_table.c.some_table_id == subq.c.id, + ) + .where(some_table.c.id == 1) + ).first(), + (1, 1, 1), + ) + + class JoinTest(fixtures.TablesTest): __backend__ = True diff --git a/test/requirements.py b/test/requirements.py index d5789d0e5..b42bab7d3 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -510,6 +510,9 @@ class DefaultRequirements(SuiteRequirements): basically, PostgreSQL. + TODO: what does this mean? all the backends have a "default" + schema + """ return only_on(["postgresql"]) diff --git a/test/sql/test_compiler.py b/test/sql/test_compiler.py index 5ea1110c6..c273dbbf8 100644 --- a/test/sql/test_compiler.py +++ b/test/sql/test_compiler.py @@ -5624,6 +5624,78 @@ class SchemaTest(fixtures.TestBase, AssertsCompiledSQL): render_schema_translate=True, ) + def test_schema_non_schema_disambiguation(self): + """test #7471""" + + t1 = table("some_table", column("id"), column("q")) + t2 = table("some_table", column("id"), column("p"), schema="foo") + + self.assert_compile( + select(t1, t2), + "SELECT some_table_1.id, some_table_1.q, " + "foo.some_table.id AS id_1, foo.some_table.p " + "FROM some_table AS some_table_1, foo.some_table", + ) + + self.assert_compile( + select(t1, t2).set_label_style(LABEL_STYLE_TABLENAME_PLUS_COL), + # the original "tablename_colname" label is preserved despite + # the alias of some_table + "SELECT some_table_1.id AS some_table_id, some_table_1.q AS " + "some_table_q, foo.some_table.id AS foo_some_table_id, " + "foo.some_table.p AS foo_some_table_p " + "FROM some_table AS some_table_1, foo.some_table", + ) + + self.assert_compile( + select(t1, t2).join_from(t1, t2, t1.c.id == t2.c.id), + "SELECT some_table_1.id, some_table_1.q, " + "foo.some_table.id AS id_1, foo.some_table.p " + "FROM some_table AS some_table_1 " + "JOIN foo.some_table ON some_table_1.id = foo.some_table.id", + ) + + self.assert_compile( + select(t1, t2).where(t1.c.id == t2.c.id), + "SELECT some_table_1.id, some_table_1.q, " + "foo.some_table.id AS id_1, foo.some_table.p " + "FROM some_table AS some_table_1, foo.some_table " + "WHERE some_table_1.id = foo.some_table.id", + ) + + self.assert_compile( + select(t1).where(t1.c.id == t2.c.id), + "SELECT some_table_1.id, some_table_1.q " + "FROM some_table AS some_table_1, foo.some_table " + "WHERE some_table_1.id = foo.some_table.id", + ) + + subq = select(t1).where(t1.c.id == t2.c.id).subquery() + self.assert_compile( + select(t2).select_from(t2).join(subq, t2.c.id == subq.c.id), + "SELECT foo.some_table.id, foo.some_table.p " + "FROM foo.some_table JOIN " + "(SELECT some_table_1.id AS id, some_table_1.q AS q " + "FROM some_table AS some_table_1, foo.some_table " + "WHERE some_table_1.id = foo.some_table.id) AS anon_1 " + "ON foo.some_table.id = anon_1.id", + ) + + self.assert_compile( + select(t1, subq.c.id) + .select_from(t1) + .join(subq, t1.c.id == subq.c.id), + # some_table is only aliased inside the subquery. this is not + # any challenge for the compiler, just checking as this is a new + # source of aliasing. + "SELECT some_table.id, some_table.q, anon_1.id AS id_1 " + "FROM some_table " + "JOIN (SELECT some_table_1.id AS id, some_table_1.q AS q " + "FROM some_table AS some_table_1, foo.some_table " + "WHERE some_table_1.id = foo.some_table.id) AS anon_1 " + "ON some_table.id = anon_1.id", + ) + def test_alias(self): a = alias(table4, "remtable") self.assert_compile( |
