summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2014-01-13 14:05:05 -0500
committerMike Bayer <mike_mp@zzzcomputing.com>2014-01-13 14:05:05 -0500
commit1536bc4664a248faf81c62326fe1be3dbe18b8cd (patch)
tree45fff52ffb650a24fea31d55ea17aab627893abe
parent5d973d52aa7d1e5b715ffb99800642cc2af4d972 (diff)
downloadsqlalchemy-1536bc4664a248faf81c62326fe1be3dbe18b8cd.tar.gz
- The MySQL CAST compilation now takes into account aspects of a string
type such as "charset" and "collation". While MySQL wants all character- based CAST calls to use the CHAR type, we now create a real CHAR object at CAST time and copy over all the parameters it has, so that an expression like ``cast(x, mysql.TEXT(charset='utf8'))`` will render ``CAST(t.col AS CHAR CHARACTER SET utf8)``. - Added new "unicode returns" detection to the MySQL dialect and to the default dialect system overall, such that any dialect can add extra "tests" to the on-first-connect "does this DBAPI return unicode directly?" detection. In this case, we are adding a check specifically against the "utf8" encoding with an explicit "utf8_bin" collation type (after checking that this collation is available) to test for some buggy unicode behavior observed with MySQLdb version 1.2.3. While MySQLdb has resolved this issue as of 1.2.4, the check here should guard against regressions. The change also allows the "unicode" checks to log in the engine logs, which was not previously the case. [ticket:2906]
-rw-r--r--doc/build/changelog/changelog_09.rst27
-rw-r--r--lib/sqlalchemy/connectors/mysqldb.py1
-rw-r--r--lib/sqlalchemy/dialects/mysql/base.py34
-rw-r--r--lib/sqlalchemy/dialects/mysql/mysqldb.py26
-rw-r--r--lib/sqlalchemy/engine/default.py51
-rw-r--r--test/dialect/mysql/test_compiler.py2
-rw-r--r--test/dialect/mysql/test_types.py12
-rw-r--r--test/engine/test_execute.py2
8 files changed, 121 insertions, 34 deletions
diff --git a/doc/build/changelog/changelog_09.rst b/doc/build/changelog/changelog_09.rst
index 74f92da06..96f763e31 100644
--- a/doc/build/changelog/changelog_09.rst
+++ b/doc/build/changelog/changelog_09.rst
@@ -15,6 +15,33 @@
:version: 0.9.2
.. change::
+ :tags: bug, mysql
+
+ The MySQL CAST compilation now takes into account aspects of a string
+ type such as "charset" and "collation". While MySQL wants all character-
+ based CAST calls to use the CHAR type, we now create a real CHAR
+ object at CAST time and copy over all the parameters it has, so that
+ an expression like ``cast(x, mysql.TEXT(charset='utf8'))`` will
+ render ``CAST(t.col AS CHAR CHARACTER SET utf8)``.
+
+ .. change::
+ :tags: bug, mysql
+ :tickets: 2906
+
+ Added new "unicode returns" detection to the MySQL dialect and
+ to the default dialect system overall, such that any dialect
+ can add extra "tests" to the on-first-connect "does this DBAPI
+ return unicode directly?" detection. In this case, we are
+ adding a check specifically against the "utf8" encoding with
+ an explicit "utf8_bin" collation type (after checking that
+ this collation is available) to test for some buggy unicode
+ behavior observed with MySQLdb version 1.2.3. While MySQLdb
+ has resolved this issue as of 1.2.4, the check here should
+ guard against regressions. The change also allows the "unicode"
+ checks to log in the engine logs, which was not previously
+ the case.
+
+ .. change::
:tags: bug, mysql, pool, engine
:tickets: 2907
diff --git a/lib/sqlalchemy/connectors/mysqldb.py b/lib/sqlalchemy/connectors/mysqldb.py
index 0f250dfdb..33e59218b 100644
--- a/lib/sqlalchemy/connectors/mysqldb.py
+++ b/lib/sqlalchemy/connectors/mysqldb.py
@@ -62,6 +62,7 @@ class MySQLDBConnector(Connector):
# is overridden when pymysql is used
return __import__('MySQLdb')
+
def do_executemany(self, cursor, statement, parameters, context=None):
rowcount = cursor.executemany(statement, parameters)
if context is not None:
diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py
index a3942e89c..22675e592 100644
--- a/lib/sqlalchemy/dialects/mysql/base.py
+++ b/lib/sqlalchemy/dialects/mysql/base.py
@@ -976,6 +976,25 @@ class CHAR(_StringType, sqltypes.CHAR):
"""
super(CHAR, self).__init__(length=length, **kwargs)
+ @classmethod
+ def _adapt_string_for_cast(self, type_):
+ # copy the given string type into a CHAR
+ # for the purposes of rendering a CAST expression
+ type_ = sqltypes.to_instance(type_)
+ if isinstance(type_, sqltypes.CHAR):
+ return type_
+ elif isinstance(type_, _StringType):
+ return CHAR(
+ length=type_.length,
+ charset=type_.charset,
+ collation=type_.collation,
+ ascii=type_.ascii,
+ binary=type_.binary,
+ unicode=type_.unicode,
+ national=False # not supported in CAST
+ )
+ else:
+ return CHAR(length=type_.length)
class NVARCHAR(_StringType, sqltypes.NVARCHAR):
"""MySQL NVARCHAR type.
@@ -1397,14 +1416,9 @@ class MySQLCompiler(compiler.SQLCompiler):
elif isinstance(type_, (sqltypes.DECIMAL, sqltypes.DateTime,
sqltypes.Date, sqltypes.Time)):
return self.dialect.type_compiler.process(type_)
- elif isinstance(type_, sqltypes.Text):
- return 'CHAR'
- elif (isinstance(type_, sqltypes.String) and not
- isinstance(type_, (ENUM, SET))):
- if getattr(type_, 'length'):
- return 'CHAR(%s)' % type_.length
- else:
- return 'CHAR'
+ elif isinstance(type_, sqltypes.String) and not isinstance(type_, (ENUM, SET)):
+ adapted = CHAR._adapt_string_for_cast(type_)
+ return self.dialect.type_compiler.process(adapted)
elif isinstance(type_, sqltypes._Binary):
return 'BINARY'
elif isinstance(type_, sqltypes.NUMERIC):
@@ -2165,7 +2179,6 @@ class MySQLDialect(default.DefaultDialect):
rs.close()
def initialize(self, connection):
- default.DefaultDialect.initialize(self, connection)
self._connection_charset = self._detect_charset(connection)
self._detect_ansiquotes(connection)
if self._server_ansiquotes:
@@ -2174,6 +2187,8 @@ class MySQLDialect(default.DefaultDialect):
self.identifier_preparer = self.preparer(self,
server_ansiquotes=self._server_ansiquotes)
+ default.DefaultDialect.initialize(self, connection)
+
@property
def _supports_cast(self):
return self.server_version_info is None or \
@@ -2443,6 +2458,7 @@ class MySQLDialect(default.DefaultDialect):
# as of MySQL 5.0.1
self._backslash_escapes = 'NO_BACKSLASH_ESCAPES' not in mode
+
def _show_create_table(self, connection, table, charset=None,
full_name=None):
"""Run SHOW CREATE TABLE for a ``Table``."""
diff --git a/lib/sqlalchemy/dialects/mysql/mysqldb.py b/lib/sqlalchemy/dialects/mysql/mysqldb.py
index c6942ae2d..84e8299d5 100644
--- a/lib/sqlalchemy/dialects/mysql/mysqldb.py
+++ b/lib/sqlalchemy/dialects/mysql/mysqldb.py
@@ -56,7 +56,8 @@ from ...connectors.mysqldb import (
MySQLDBIdentifierPreparer,
MySQLDBConnector
)
-
+from .base import TEXT
+from ... import sql
class MySQLExecutionContext_mysqldb(MySQLDBExecutionContext, MySQLExecutionContext):
pass
@@ -75,4 +76,27 @@ class MySQLDialect_mysqldb(MySQLDBConnector, MySQLDialect):
statement_compiler = MySQLCompiler_mysqldb
preparer = MySQLIdentifierPreparer_mysqldb
+ def _check_unicode_returns(self, connection):
+ # work around issue fixed in
+ # https://github.com/farcepest/MySQLdb1/commit/cd44524fef63bd3fcb71947392326e9742d520e8
+ # specific issue w/ the utf8_bin collation and unicode returns
+
+ has_utf8_bin = connection.scalar(
+ "show collation where %s = 'utf8' and %s = 'utf8_bin'"
+ % (
+ self.identifier_preparer.quote("Charset"),
+ self.identifier_preparer.quote("Collation")
+ ))
+ if has_utf8_bin:
+ additional_tests = [
+ sql.collate(sql.cast(
+ sql.literal_column(
+ "'test collated returns'"),
+ TEXT(charset='utf8')), "utf8_bin")
+ ]
+ else:
+ additional_tests = []
+ return super(MySQLDBConnector, self)._check_unicode_returns(
+ connection, additional_tests)
+
dialect = MySQLDialect_mysqldb
diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py
index 509d772aa..bcb9960b1 100644
--- a/lib/sqlalchemy/engine/default.py
+++ b/lib/sqlalchemy/engine/default.py
@@ -228,46 +228,55 @@ class DefaultDialect(interfaces.Dialect):
"""
return None
- def _check_unicode_returns(self, connection):
+ def _check_unicode_returns(self, connection, additional_tests=None):
if util.py2k and not self.supports_unicode_statements:
cast_to = util.binary_type
else:
cast_to = util.text_type
- def check_unicode(formatstr, type_):
+ if self.positional:
+ parameters = self.execute_sequence_format()
+ else:
+ parameters = {}
+
+ def check_unicode(test):
cursor = connection.connection.cursor()
try:
try:
- cursor.execute(
- cast_to(
- expression.select(
- [expression.cast(
- expression.literal_column(
- "'test %s returns'" % formatstr),
- type_)
- ]).compile(dialect=self)
- )
- )
+ statement = cast_to(expression.select([test]).compile(dialect=self))
+ connection._cursor_execute(cursor, statement, parameters)
row = cursor.fetchone()
return isinstance(row[0], util.text_type)
- except self.dbapi.Error as de:
+ except exc.DBAPIError as de:
util.warn("Exception attempting to "
"detect unicode returns: %r" % de)
return False
finally:
cursor.close()
- # detect plain VARCHAR
- unicode_for_varchar = check_unicode("plain", sqltypes.VARCHAR(60))
-
- # detect if there's an NVARCHAR type with different behavior available
- unicode_for_unicode = check_unicode("unicode", sqltypes.Unicode(60))
-
- if unicode_for_unicode and not unicode_for_varchar:
+ tests = [
+ # detect plain VARCHAR
+ expression.cast(
+ expression.literal_column("'test plain returns'"),
+ sqltypes.VARCHAR(60)
+ ),
+ # detect if there's an NVARCHAR type with different behavior available
+ expression.cast(
+ expression.literal_column("'test unicode returns'"),
+ sqltypes.Unicode(60)
+ ),
+ ]
+
+ if additional_tests:
+ tests += additional_tests
+
+ results = set([check_unicode(test) for test in tests])
+
+ if results.issuperset([True, False]):
return "conditional"
else:
- return unicode_for_varchar
+ return results == set([True])
def _check_unicode_description(self, connection):
# all DBAPIs on Py2K return cursor.description as encoded,
diff --git a/test/dialect/mysql/test_compiler.py b/test/dialect/mysql/test_compiler.py
index 46e8bfb82..45f8405c8 100644
--- a/test/dialect/mysql/test_compiler.py
+++ b/test/dialect/mysql/test_compiler.py
@@ -341,8 +341,10 @@ class SQLTest(fixtures.TestBase, AssertsCompiledSQL):
(VARCHAR, "CAST(t.col AS CHAR)"),
(NCHAR, "CAST(t.col AS CHAR)"),
(CHAR, "CAST(t.col AS CHAR)"),
+ (m.CHAR(charset='utf8'), "CAST(t.col AS CHAR CHARACTER SET utf8)"),
(CLOB, "CAST(t.col AS CHAR)"),
(TEXT, "CAST(t.col AS CHAR)"),
+ (m.TEXT(charset='utf8'), "CAST(t.col AS CHAR CHARACTER SET utf8)"),
(String(32), "CAST(t.col AS CHAR(32))"),
(Unicode(32), "CAST(t.col AS CHAR(32))"),
(CHAR(32), "CAST(t.col AS CHAR(32))"),
diff --git a/test/dialect/mysql/test_types.py b/test/dialect/mysql/test_types.py
index e3d5d6185..acf9c1e2f 100644
--- a/test/dialect/mysql/test_types.py
+++ b/test/dialect/mysql/test_types.py
@@ -264,15 +264,23 @@ class TypesTest(fixtures.TestBase, AssertsExecutionResults, AssertsCompiledSQL):
def test_charset_collate_table(self):
t = Table('foo', self.metadata,
Column('id', Integer),
+ Column('data', UnicodeText),
mysql_default_charset='utf8',
- mysql_collate='utf8_unicode_ci'
+ mysql_collate='utf8_bin'
)
t.create()
m2 = MetaData(testing.db)
t2 = Table('foo', m2, autoload=True)
- eq_(t2.kwargs['mysql_collate'], 'utf8_unicode_ci')
+ eq_(t2.kwargs['mysql_collate'], 'utf8_bin')
eq_(t2.kwargs['mysql_default charset'], 'utf8')
+ # test [ticket:2906]
+ # in order to test the condition here, need to use
+ # MySQLdb 1.2.3 and also need to pass either use_unicode=1
+ # or charset=utf8 to the URL.
+ t.insert().execute(id=1, data=u('some text'))
+ assert isinstance(testing.db.scalar(select([t.c.data])), util.text_type)
+
def test_bit_50(self):
"""Exercise BIT types on 5.0+ (not valid for all engine types)"""
diff --git a/test/engine/test_execute.py b/test/engine/test_execute.py
index c2479eff7..d3bd3c2cd 100644
--- a/test/engine/test_execute.py
+++ b/test/engine/test_execute.py
@@ -1050,7 +1050,7 @@ class ResultProxyTest(fixtures.TestBase):
class ExecutionOptionsTest(fixtures.TestBase):
def test_dialect_conn_options(self):
- engine = testing_engine("sqlite://")
+ engine = testing_engine("sqlite://", options=dict(_initialize=False))
engine.dialect = Mock()
conn = engine.connect()
c2 = conn.execution_options(foo="bar")