summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/build/changelog/unreleased_12/4429.rst15
-rw-r--r--lib/sqlalchemy/exc.py43
-rw-r--r--lib/sqlalchemy/testing/suite/test_dialect.py25
-rw-r--r--lib/sqlalchemy/util/__init__.py1
-rw-r--r--lib/sqlalchemy/util/compat.py12
-rw-r--r--test/base/test_utils.py46
-rw-r--r--test/engine/test_execute.py59
7 files changed, 190 insertions, 11 deletions
diff --git a/doc/build/changelog/unreleased_12/4429.rst b/doc/build/changelog/unreleased_12/4429.rst
new file mode 100644
index 000000000..12c3e7e10
--- /dev/null
+++ b/doc/build/changelog/unreleased_12/4429.rst
@@ -0,0 +1,15 @@
+.. change::
+ :tags: bug, engine
+ :tickets: 4429
+
+ Fixed a regression introduced in version 1.2 where a refactor
+ of the :class:`.SQLAlchemyError` base exception class introduced an
+ inappropriate coercion of a plain string message into Unicode under
+ python 2k, which is not handled by the Python interpreter for characters
+ outside of the platform's encoding (typically ascii). The
+ :class:`.SQLAlchemyError` class now passes a bytestring through under
+ Py2K for ``__str__()`` as is the behavior of exception objects in general
+ under Py2K, does a safe coercion to unicode utf-8 with
+ backslash fallback for ``__unicode__()``. For Py3K the message is
+ typically unicode already, but if not is again safe-coerced with utf-8
+ with backslash fallback for the ``__str__()`` method. \ No newline at end of file
diff --git a/lib/sqlalchemy/exc.py b/lib/sqlalchemy/exc.py
index e47df85af..cf16f9772 100644
--- a/lib/sqlalchemy/exc.py
+++ b/lib/sqlalchemy/exc.py
@@ -36,24 +36,47 @@ class SQLAlchemyError(Exception):
"http://sqlalche.me/e/%s)" % (self.code,)
)
- def _message(self):
- # get string representation just like Exception.__str__(self),
- # but also support if the string has non-ascii chars
+ def _message(self, as_unicode=compat.py3k):
+ # rules:
+ #
+ # 1. under py2k, for __str__ return single string arg as it was
+ # given without converting to unicode. for __unicode__
+ # do a conversion but check that it's not unicode already just in
+ # case
+ #
+ # 2. under py3k, single arg string will usually be a unicode
+ # object, but since __str__() must return unicode, check for
+ # bytestring just in case
+ #
+ # 3. for multiple self.args, this is not a case in current
+ # SQLAlchemy though this is happening in at least one known external
+ # library, call str() which does a repr().
+ #
if len(self.args) == 1:
- return compat.text_type(self.args[0])
+ text = self.args[0]
+ if as_unicode and isinstance(text, compat.binary_types):
+ return compat.decode_backslashreplace(text, "utf-8")
+ else:
+ return self.args[0]
else:
- return compat.text_type(self.args)
+ # this is not a normal case within SQLAlchemy but is here for
+ # compatibility with Exception.args - the str() comes out as
+ # a repr() of the tuple
+ return str(self.args)
- def __str__(self):
- message = self._message()
+ def _sql_message(self, as_unicode):
+ message = self._message(as_unicode)
if self.code:
message = "%s %s" % (message, self._code_str())
return message
+ def __str__(self):
+ return self._sql_message(compat.py3k)
+
def __unicode__(self):
- return self.__str__()
+ return self._sql_message(True)
class ArgumentError(SQLAlchemyError):
@@ -321,10 +344,10 @@ class StatementError(SQLAlchemyError):
(self.args[0], self.statement, self.params, self.orig),
)
- def __str__(self):
+ def _sql_message(self, as_unicode):
from sqlalchemy.sql import util
- details = [self._message()]
+ details = [self._message(as_unicode=as_unicode)]
if self.statement:
details.append("[SQL: %r]" % self.statement)
if self.params:
diff --git a/lib/sqlalchemy/testing/suite/test_dialect.py b/lib/sqlalchemy/testing/suite/test_dialect.py
index 245ccc6f0..1b3307042 100644
--- a/lib/sqlalchemy/testing/suite/test_dialect.py
+++ b/lib/sqlalchemy/testing/suite/test_dialect.py
@@ -1,3 +1,5 @@
+#! coding: utf-8
+
from .. import assert_raises
from .. import config
from .. import eq_
@@ -11,6 +13,7 @@ from ... import Integer
from ... import literal_column
from ... import select
from ... import String
+from ...util import compat
class ExceptionTest(fixtures.TablesTest):
@@ -53,6 +56,28 @@ class ExceptionTest(fixtures.TablesTest):
trans.rollback()
+ def test_exception_with_non_ascii(self):
+ with config.db.connect() as conn:
+ try:
+ # try to create an error message that likely has non-ascii
+ # characters in the DBAPI's message string. unfortunately
+ # there's no way to make this happen with some drivers like
+ # mysqlclient, pymysql. this at least does produce a non-
+ # ascii error message for cx_oracle, psycopg2
+ conn.execute(select([literal_column(u"méil")]))
+ assert False
+ except exc.DBAPIError as err:
+ err_str = str(err)
+
+ assert str(err.orig) in str(err)
+
+ # test that we are actually getting string on Py2k, unicode
+ # on Py3k.
+ if compat.py2k:
+ assert isinstance(err_str, str)
+ else:
+ assert isinstance(err_str, str)
+
class AutocommitTest(fixtures.TablesTest):
diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py
index 13bcc37e7..4909c7c60 100644
--- a/lib/sqlalchemy/util/__init__.py
+++ b/lib/sqlalchemy/util/__init__.py
@@ -51,6 +51,7 @@ from .compat import byte_buffer # noqa
from .compat import callable # noqa
from .compat import cmp # noqa
from .compat import cpython # noqa
+from .compat import decode_backslashreplace # noqa
from .compat import dottedgetter # noqa
from .compat import inspect_getargspec # noqa
from .compat import int_types # noqa
diff --git a/lib/sqlalchemy/util/compat.py b/lib/sqlalchemy/util/compat.py
index 7963eebb6..6c24f75e1 100644
--- a/lib/sqlalchemy/util/compat.py
+++ b/lib/sqlalchemy/util/compat.py
@@ -92,6 +92,9 @@ if py3k:
def b64encode(x):
return base64.b64encode(x).decode("ascii")
+ def decode_backslashreplace(text, encoding):
+ return text.decode(encoding, errors="backslashreplace")
+
def cmp(a, b):
return (a > b) - (a < b)
@@ -195,6 +198,15 @@ else:
def ue(s):
return unicode(s, "unicode_escape") # noqa
+ def decode_backslashreplace(text, encoding):
+ try:
+ return text.decode(encoding)
+ except UnicodeDecodeError:
+ # regular "backslashreplace" for an incompatible encoding raises:
+ # "TypeError: don't know how to handle UnicodeDecodeError in
+ # error callback"
+ return repr(text)[1:-1].decode()
+
# not as nice as that of Py3K, but at least preserves
# the code line where the issue occurred
exec(
diff --git a/test/base/test_utils.py b/test/base/test_utils.py
index 20b41101e..69af6e032 100644
--- a/test/base/test_utils.py
+++ b/test/base/test_utils.py
@@ -1,3 +1,5 @@
+#! coding: utf-8
+
import copy
import inspect
import sys
@@ -2552,3 +2554,47 @@ class QuotedTokenParserTest(fixtures.TestBase):
def test_quoted_single_w_dot_middle(self):
self._test('"na.me"', ["na.me"])
+
+
+class BackslashReplaceTest(fixtures.TestBase):
+ def test_ascii_to_utf8(self):
+ eq_(
+ compat.decode_backslashreplace(util.b("hello world"), "utf-8"),
+ util.u("hello world"),
+ )
+
+ def test_utf8_to_utf8(self):
+ eq_(
+ compat.decode_backslashreplace(
+ util.u("some message méil").encode("utf-8"), "utf-8"
+ ),
+ util.u("some message méil"),
+ )
+
+ def test_latin1_to_utf8(self):
+ eq_(
+ compat.decode_backslashreplace(
+ util.u("some message méil").encode("latin-1"), "utf-8"
+ ),
+ util.u("some message m\\xe9il"),
+ )
+
+ eq_(
+ compat.decode_backslashreplace(
+ util.u("some message méil").encode("latin-1"), "latin-1"
+ ),
+ util.u("some message méil"),
+ )
+
+ def test_cp1251_to_utf8(self):
+ message = util.u("some message П").encode("cp1251")
+ eq_(message, b"some message \xcf")
+ eq_(
+ compat.decode_backslashreplace(message, "utf-8"),
+ util.u("some message \\xcf"),
+ )
+
+ eq_(
+ compat.decode_backslashreplace(message, "cp1251"),
+ util.u("some message П"),
+ )
diff --git a/test/engine/test_execute.py b/test/engine/test_execute.py
index d9d10a9a6..8613be5bc 100644
--- a/test/engine/test_execute.py
+++ b/test/engine/test_execute.py
@@ -406,7 +406,7 @@ class ExecuteTest(fixtures.TestBase):
obj,
)
- def test_stmt_exception_non_ascii(self):
+ def test_stmt_exception_bytestring_raised(self):
name = util.u("méil")
with testing.db.connect() as conn:
assert_raises_message(
@@ -427,6 +427,63 @@ class ExecuteTest(fixtures.TestBase):
{"uname_incorrect": "foo"},
)
+ def test_stmt_exception_bytestring_utf8(self):
+ # uncommon case for Py3K, bytestring object passed
+ # as the error message
+ message = util.u("some message méil").encode("utf-8")
+
+ err = tsa.exc.SQLAlchemyError(message)
+ if util.py2k:
+ # string passes it through
+ eq_(str(err), message)
+
+ # unicode accessor decodes to utf-8
+ eq_(unicode(err), util.u("some message méil")) # noqa
+ else:
+ eq_(str(err), util.u("some message méil"))
+
+ def test_stmt_exception_bytestring_latin1(self):
+ # uncommon case for Py3K, bytestring object passed
+ # as the error message
+ message = util.u("some message méil").encode("latin-1")
+
+ err = tsa.exc.SQLAlchemyError(message)
+ if util.py2k:
+ # string passes it through
+ eq_(str(err), message)
+
+ # unicode accessor decodes to utf-8
+ eq_(unicode(err), util.u("some message m\\xe9il")) # noqa
+ else:
+ eq_(str(err), util.u("some message m\\xe9il"))
+
+ def test_stmt_exception_unicode_hook_unicode(self):
+ # uncommon case for Py2K, Unicode object passed
+ # as the error message
+ message = util.u("some message méil")
+
+ err = tsa.exc.SQLAlchemyError(message)
+ if util.py2k:
+ eq_(unicode(err), util.u("some message méil")) # noqa
+ else:
+ eq_(str(err), util.u("some message méil"))
+
+ def test_stmt_exception_str_multi_args(self):
+ err = tsa.exc.SQLAlchemyError("some message", 206)
+ eq_(str(err), "('some message', 206)")
+
+ def test_stmt_exception_str_multi_args_bytestring(self):
+ message = util.u("some message méil").encode("utf-8")
+
+ err = tsa.exc.SQLAlchemyError(message, 206)
+ eq_(str(err), str((message, 206)))
+
+ def test_stmt_exception_str_multi_args_unicode(self):
+ message = util.u("some message méil")
+
+ err = tsa.exc.SQLAlchemyError(message, 206)
+ eq_(str(err), str((message, 206)))
+
def test_stmt_exception_pickleable_no_dbapi(self):
self._test_stmt_exception_pickleable(Exception("hello world"))