removals: all unicode encoding / decoding

Removed here includes: * convert_unicode parameters * encoding create_engine() parameter * description encoding support * "non-unicode fallback" modes under Python 2 * String symbols regarding Python 2 non-unicode fallbacks * any concept of DBAPIs that don't accept unicode statements, unicode bound parameters, or that return bytes for strings anywhere except an explicit Binary / BLOB type * unicode processors in Python / C Risk factors: * Whether all DBAPIs do in fact return Unicode objects for all entries in cursor.description now * There was logic for mysql-connector trying to determine description encoding. A quick test shows Unicode coming back but it's not clear if there are still edge cases where they return bytes. if so, these are bugs in that driver, and at most we would only work around it in the mysql-connector DBAPI itself (but we won't do that either). * It seems like Oracle 8 was not expecting unicode bound parameters. I'm assuming this was all Python 2 stuff and does not apply for modern cx_Oracle under Python 3. * third party dialects relying upon built in unicode encoding/decoding but it's hard to imagine any non-SQLAlchemy database driver not dealing exclusively in Python unicode strings in Python 3 Change-Id: I97d762ef6d4dd836487b714d57d8136d0310f28a References: #7257
author: Mike Bayer <mike_mp@zzzcomputing.com> 2021-11-07 15:47:15 -0500
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2021-11-10 11:24:53 -0500
commit: bd2a6e9b161251606b64d299faec583d55c2e802 (patch)
tree: cb9e304b72be61c59709caa792920515afc26c32 /lib/sqlalchemy/dialects
parent: 0c6071513fea9d183dc67979a239dff746992571 (diff)
download: sqlalchemy-bd2a6e9b161251606b64d299faec583d55c2e802.tar.gz
8 files changed, 28 insertions, 158 deletions
diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py
index 8c8260f3b..f8ca1ffbf 100644
--- a/lib/sqlalchemy/dialects/mssql/base.py
+++ b/lib/sqlalchemy/dialects/mssql/base.py
@@ -1603,17 +1603,12 @@ class MSExecutionContext(default.DefaultExecutionContext):
 
     def _opt_encode(self, statement):
 
-        if not self.dialect.supports_unicode_statements:
-            encoded = self.dialect._encoder(statement)[0]
-        else:
-            encoded = statement
-
         if self.compiled and self.compiled.schema_translate_map:
 
             rst = self.compiled.preparer._render_schema_translates
-            encoded = rst(encoded, self.compiled.schema_translate_map)
+            statement = rst(statement, self.compiled.schema_translate_map)
 
-        return encoded
+        return statement
 
     def pre_exec(self):
         """Activate IDENTITY_INSERT if needed."""
diff --git a/lib/sqlalchemy/dialects/mysql/mysqlconnector.py b/lib/sqlalchemy/dialects/mysql/mysqlconnector.py
index e17da3174..fef4f14ca 100644
--- a/lib/sqlalchemy/dialects/mysql/mysqlconnector.py
+++ b/lib/sqlalchemy/dialects/mysql/mysqlconnector.py
@@ -27,7 +27,6 @@ from .base import BIT
 from .base import MySQLCompiler
 from .base import MySQLDialect
 from .base import MySQLIdentifierPreparer
-from ... import processors
 from ... import util
 
 
@@ -87,8 +86,6 @@ class MySQLDialect_mysqlconnector(MySQLDialect):
     driver = "mysqlconnector"
     supports_statement_cache = True
 
-    supports_unicode_binds = True
-
     supports_sane_rowcount = True
     supports_sane_multi_rowcount = True
 
@@ -101,29 +98,6 @@ class MySQLDialect_mysqlconnector(MySQLDialect):
 
     colspecs = util.update_copy(MySQLDialect.colspecs, {BIT: _myconnpyBIT})
 
-    def __init__(self, *arg, **kw):
-        super(MySQLDialect_mysqlconnector, self).__init__(*arg, **kw)
-
-        # hack description encoding since mysqlconnector randomly
-        # returns bytes or not
-        self._description_decoder = (
-            processors.to_conditional_unicode_processor_factory
-        )(self.description_encoding)
-
-    def _check_unicode_description(self, connection):
-        # hack description encoding since mysqlconnector randomly
-        # returns bytes or not
-        return False
-
-    @property
-    def description_encoding(self):
-        # total guess
-        return "latin-1"
-
-    @util.memoized_property
-    def supports_unicode_statements(self):
-        return util.py3k or self._mysqlconnector_version_info > (2, 0)
-
     @classmethod
     def dbapi(cls):
         from mysql import connector
diff --git a/lib/sqlalchemy/dialects/mysql/pymysql.py b/lib/sqlalchemy/dialects/mysql/pymysql.py
index 1d2c3be2d..3c30fb9ea 100644
--- a/lib/sqlalchemy/dialects/mysql/pymysql.py
+++ b/lib/sqlalchemy/dialects/mysql/pymysql.py
@@ -48,12 +48,6 @@ class MySQLDialect_pymysql(MySQLDialect_mysqldb):
 
     description_encoding = None
 
-    # generally, these two values should be both True
-    # or both False.   PyMySQL unicode tests pass all the way back
-    # to 0.4 either way.  See [ticket:3337]
-    supports_unicode_statements = True
-    supports_unicode_binds = True
-
     @langhelpers.memoized_property
     def supports_server_side_cursors(self):
         try:
diff --git a/lib/sqlalchemy/dialects/oracle/base.py b/lib/sqlalchemy/dialects/oracle/base.py
index 5a43205df..229a54b95 100644
--- a/lib/sqlalchemy/dialects/oracle/base.py
+++ b/lib/sqlalchemy/dialects/oracle/base.py
@@ -10,7 +10,7 @@ r"""
     :name: Oracle
     :full_support: 11.2, 18c
     :normal_support: 11+
-    :best_effort: 8+
+    :best_effort: 9+
 
 
 Auto Increment Behavior
@@ -341,6 +341,9 @@ and specify "passive_updates=False" on each relationship().
 Oracle 8 Compatibility
 ----------------------
 
+.. warning:: The status of Oracle 8 compatibility is not known for SQLAlchemy
+   2.0.
+
 When Oracle 8 is detected, the dialect internally configures itself to the
 following behaviors:
 
@@ -349,16 +352,12 @@ following behaviors:
   makes use of Oracle's (+) operator.
 
 * the NVARCHAR2 and NCLOB datatypes are no longer generated as DDL when
-  the :class:`~sqlalchemy.types.Unicode` is used - VARCHAR2 and CLOB are
-  issued instead.   This because these types don't seem to work correctly on
-  Oracle 8 even though they are available.  The
-  :class:`~sqlalchemy.types.NVARCHAR` and
+  the :class:`~sqlalchemy.types.Unicode` is used - VARCHAR2 and CLOB are issued
+  instead. This because these types don't seem to work correctly on Oracle 8
+  even though they are available. The :class:`~sqlalchemy.types.NVARCHAR` and
   :class:`~sqlalchemy.dialects.oracle.NCLOB` types will always generate
   NVARCHAR2 and NCLOB.
 
-* the "native unicode" mode is disabled when using cx_oracle, i.e. SQLAlchemy
-  encodes all Python unicode objects to "string" before passing in as bind
-  parameters.
 
 Synonym/DBLINK Reflection
 -------------------------
@@ -1439,8 +1438,6 @@ class OracleDialect(default.DefaultDialect):
     name = "oracle"
     supports_statement_cache = True
     supports_alter = True
-    supports_unicode_statements = False
-    supports_unicode_binds = False
     max_identifier_length = 128
 
     supports_simple_order_by_label = False
@@ -1576,17 +1573,6 @@ class OracleDialect(default.DefaultDialect):
             # use the default
             return None
 
-    def _check_unicode_returns(self, connection):
-        additional_tests = [
-            expression.cast(
-                expression.literal_column("'test nvarchar2 returns'"),
-                sqltypes.NVARCHAR(60),
-            )
-        ]
-        return super(OracleDialect, self)._check_unicode_returns(
-            connection, additional_tests
-        )
-
     _isolation_lookup = ["READ COMMITTED", "SERIALIZABLE"]
 
     def get_isolation_level(self, connection):
diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
index 590c9d47c..23f619a12 100644
--- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py
+++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
@@ -119,7 +119,7 @@ itself.  These options are always passed directly to :func:`_sa.create_engine`
 , such as::
 
     e = create_engine(
-        "oracle+cx_oracle://user:pass@dsn", coerce_to_unicode=False)
+        "oracle+cx_oracle://user:pass@dsn", coerce_to_decimal=False)
 
 The parameters accepted by the cx_oracle dialect are as follows:
 
@@ -130,8 +130,6 @@ The parameters accepted by the cx_oracle dialect are as follows:
 
 * ``auto_convert_lobs`` - defaults to True; See :ref:`cx_oracle_lob`.
 
-* ``coerce_to_unicode`` - see :ref:`cx_oracle_unicode` for detail.
-
 * ``coerce_to_decimal`` - see :ref:`cx_oracle_numeric` for detail.
 
 * ``encoding_errors`` - see :ref:`cx_oracle_unicode_encoding_errors` for detail.
@@ -210,8 +208,7 @@ Unicode
 -------
 
 As is the case for all DBAPIs under Python 3, all strings are inherently
-Unicode strings.     Under Python 2, cx_Oracle also supports Python Unicode
-objects directly.    In all cases however, the driver requires an explicit
+Unicode strings.   In all cases however, the driver requires an explicit
 encoding configuration.
 
 Ensuring the Correct Client Encoding
@@ -264,25 +261,6 @@ SQLAlchemy dialect to use NCHAR/NCLOB for the :class:`.Unicode` /
    unless the ``use_nchar_for_unicode=True`` is passed to the dialect
    when :func:`_sa.create_engine` is called.
 
-Unicode Coercion of result rows under Python 2
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When result sets are fetched that include strings, under Python 3 the cx_Oracle
-DBAPI returns all strings as Python Unicode objects, since Python 3 only has a
-Unicode string type.  This occurs for data fetched from datatypes such as
-VARCHAR2, CHAR, CLOB, NCHAR, NCLOB, etc.  In order to provide cross-
-compatibility under Python 2, the SQLAlchemy cx_Oracle dialect will add
-Unicode-conversion to string data under Python 2 as well.  Historically, this
-made use of converters that were supplied by cx_Oracle but were found to be
-non-performant; SQLAlchemy's own converters are used for the string to Unicode
-conversion under Python 2.  To disable the Python 2 Unicode conversion for
-VARCHAR2, CHAR, and CLOB, the flag ``coerce_to_unicode=False`` can be passed to
-:func:`_sa.create_engine`.
-
-.. versionchanged:: 1.3 Unicode conversion is applied to all string values
-   by default under python 2.  The ``coerce_to_unicode`` now defaults to True
-   and can be set to False to disable the Unicode coercion of strings that are
-   delivered as VARCHAR2/CHAR/CLOB data.
 
 .. _cx_oracle_unicode_encoding_errors:
 
@@ -855,9 +833,6 @@ class OracleDialect_cx_oracle(OracleDialect):
     supports_sane_rowcount = True
     supports_sane_multi_rowcount = True
 
-    supports_unicode_statements = True
-    supports_unicode_binds = True
-
     use_setinputsizes = True
 
     driver = "cx_oracle"
@@ -892,6 +867,8 @@ class OracleDialect_cx_oracle(OracleDialect):
 
     _cx_oracle_threaded = None
 
+    _cursor_var_unicode_kwargs = util.immutabledict()
+
     @util.deprecated_params(
         threaded=(
             "1.3",
@@ -906,7 +883,6 @@ class OracleDialect_cx_oracle(OracleDialect):
     def __init__(
         self,
         auto_convert_lobs=True,
-        coerce_to_unicode=True,
         coerce_to_decimal=True,
         arraysize=50,
         encoding_errors=None,
@@ -917,10 +893,13 @@ class OracleDialect_cx_oracle(OracleDialect):
         OracleDialect.__init__(self, **kwargs)
         self.arraysize = arraysize
         self.encoding_errors = encoding_errors
+        if encoding_errors:
+            self._cursor_var_unicode_kwargs = {
+                "encodingErrors": encoding_errors
+            }
         if threaded is not None:
             self._cx_oracle_threaded = threaded
         self.auto_convert_lobs = auto_convert_lobs
-        self.coerce_to_unicode = coerce_to_unicode
         self.coerce_to_decimal = coerce_to_decimal
         if self._use_nchar_for_unicode:
             self.colspecs = self.colspecs.copy()
@@ -939,6 +918,13 @@ class OracleDialect_cx_oracle(OracleDialect):
                     "cx_Oracle version 5.2 and above are supported"
                 )
 
+            if encoding_errors and self.cx_oracle_ver < (6, 4):
+                util.warn(
+                    "cx_oracle version %r does not support encodingErrors"
+                    % (self.cx_oracle_ver,)
+                )
+                self._cursor_var_unicode_kwargs = util.immutabledict()
+
             self._include_setinputsizes = {
                 cx_Oracle.DATETIME,
                 cx_Oracle.NCLOB,
@@ -974,19 +960,6 @@ class OracleDialect_cx_oracle(OracleDialect):
 
         self._is_cx_oracle_6 = self.cx_oracle_ver >= (6,)
 
-    @property
-    def _cursor_var_unicode_kwargs(self):
-        if self.encoding_errors:
-            if self.cx_oracle_ver >= (6, 4):
-                return {"encodingErrors": self.encoding_errors}
-            else:
-                util.warn(
-                    "cx_oracle version %r does not support encodingErrors"
-                    % (self.cx_oracle_ver,)
-                )
-
-        return {}
-
     def _parse_cx_oracle_ver(self, version):
         m = re.match(r"(\d+)\.(\d+)(?:\.(\d+))?", version)
         if m:
@@ -1002,9 +975,6 @@ class OracleDialect_cx_oracle(OracleDialect):
 
     def initialize(self, connection):
         super(OracleDialect_cx_oracle, self).initialize(connection)
-        if self._is_oracle_8:
-            self.supports_unicode_binds = False
-
         self._detect_decimal_char(connection)
 
     def get_isolation_level(self, connection):
@@ -1141,9 +1111,10 @@ class OracleDialect_cx_oracle(OracleDialect):
                         cursor, name, default_type, size, precision, scale
                     )
 
-            # allow all strings to come back natively as Unicode
+            # if unicode options were specified, add a decoder, otherwise
+            # cx_Oracle should return Unicode
             elif (
-                dialect.coerce_to_unicode
+                dialect._cursor_var_unicode_kwargs
                 and default_type
                 in (
                     cx_Oracle.STRING,
@@ -1338,13 +1309,6 @@ class OracleDialect_cx_oracle(OracleDialect):
                 if dbtype
             )
 
-            if not self.supports_unicode_binds:
-                # oracle 8 only
-                collection = (
-                    (self.dialect._encoder(key)[0], dbtype)
-                    for key, dbtype in collection
-                )
-
             cursor.setinputsizes(**{key: dbtype for key, dbtype in collection})
 
     def do_recover_twophase(self, connection):
diff --git a/lib/sqlalchemy/dialects/postgresql/asyncpg.py b/lib/sqlalchemy/dialects/postgresql/asyncpg.py
index fedc0b495..28374ed60 100644
--- a/lib/sqlalchemy/dialects/postgresql/asyncpg.py
+++ b/lib/sqlalchemy/dialects/postgresql/asyncpg.py
@@ -863,11 +863,8 @@ class PGDialect_asyncpg(PGDialect):
     driver = "asyncpg"
     supports_statement_cache = True
 
-    supports_unicode_statements = True
     supports_server_side_cursors = True
 
-    supports_unicode_binds = True
-
     default_paramstyle = "format"
     supports_sane_multi_rowcount = False
     execution_ctx_cls = PGExecutionContext_asyncpg
diff --git a/lib/sqlalchemy/dialects/postgresql/psycopg2.py b/lib/sqlalchemy/dialects/postgresql/psycopg2.py
index 162ddde94..aadd11059 100644
--- a/lib/sqlalchemy/dialects/postgresql/psycopg2.py
+++ b/lib/sqlalchemy/dialects/postgresql/psycopg2.py
@@ -40,13 +40,6 @@ may be passed to :func:`_sa.create_engine()`, and include the following:
 
     :ref:`psycopg2_unicode`
 
-* ``use_native_unicode``: Under Python 2 only, this can be set to False to
-  disable the use of psycopg2's native Unicode support.
-
-  .. seealso::
-
-    :ref:`psycopg2_disable_native_unicode`
-
 
 * ``executemany_mode``, ``executemany_batch_page_size``,
   ``executemany_values_page_size``: Allows use of psycopg2
@@ -295,10 +288,7 @@ size defaults to 100.  These can be affected by passing new values to
 Unicode with Psycopg2
 ----------------------
 
-The psycopg2 DBAPI driver supports Unicode data transparently.   Under Python 2
-only, the SQLAlchemy psycopg2 dialect will enable the
-``psycopg2.extensions.UNICODE`` extension by default to ensure Unicode is
-handled properly; under Python 3, this is psycopg2's default behavior.
+The psycopg2 DBAPI driver supports Unicode data transparently.
 
 The client character encoding can be controlled for the psycopg2 dialect
 in the following ways:
@@ -347,21 +337,6 @@ in the following ways:
                                  # encoding
     client_encoding = utf8
 
-.. _psycopg2_disable_native_unicode:
-
-Disabling Native Unicode
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-Under Python 2 only, SQLAlchemy can also be instructed to skip the usage of the
-psycopg2 ``UNICODE`` extension and to instead utilize its own unicode
-encode/decode services, which are normally reserved only for those DBAPIs that
-don't fully support unicode directly.  Passing ``use_native_unicode=False`` to
-:func:`_sa.create_engine` will disable usage of ``psycopg2.extensions.
-UNICODE``. SQLAlchemy will instead encode data itself into Python bytestrings
-on the way in and coerce from bytes on the way back, using the value of the
-:func:`_sa.create_engine` ``encoding`` parameter, which defaults to ``utf-8``.
-SQLAlchemy's own unicode encode/decode functionality is steadily becoming
-obsolete as most DBAPIs now support unicode fully.
 
 
 Transactions
@@ -659,10 +634,6 @@ class PGDialect_psycopg2(PGDialect):
 
     _has_native_hstore = True
 
-    engine_config_types = PGDialect.engine_config_types.union(
-        {"use_native_unicode": util.asbool}
-    )
-
     colspecs = util.update_copy(
         PGDialect.colspecs,
         {
@@ -678,7 +649,6 @@ class PGDialect_psycopg2(PGDialect):
 
     def __init__(
         self,
-        use_native_unicode=True,
         client_encoding=None,
         use_native_hstore=True,
         use_native_uuid=True,
@@ -688,16 +658,10 @@ class PGDialect_psycopg2(PGDialect):
         **kwargs
     ):
         PGDialect.__init__(self, **kwargs)
-        self.use_native_unicode = use_native_unicode
-        if not use_native_unicode:
-            raise exc.ArgumentError(
-                "psycopg2 native_unicode mode is required under Python 3"
-            )
         if not use_native_hstore:
             self._has_native_hstore = False
         self.use_native_hstore = use_native_hstore
         self.use_native_uuid = use_native_uuid
-        self.supports_unicode_binds = use_native_unicode
         self.client_encoding = client_encoding
 
         # Parse executemany_mode argument, allowing it to be only one of the
@@ -892,8 +856,6 @@ class PGDialect_psycopg2(PGDialect):
             executemany_values = (
                 "(%s)" % context.compiled.insert_single_values_expr
             )
-            if not self.supports_unicode_statements:
-                executemany_values = executemany_values.encode(self.encoding)
 
             # guard for statement that was altered via event hook or similar
             if executemany_values not in statement:
diff --git a/lib/sqlalchemy/dialects/sqlite/base.py b/lib/sqlalchemy/dialects/sqlite/base.py
index e936c9080..dc8425859 100644
--- a/lib/sqlalchemy/dialects/sqlite/base.py
+++ b/lib/sqlalchemy/dialects/sqlite/base.py
@@ -1795,8 +1795,6 @@ class SQLiteExecutionContext(default.DefaultExecutionContext):
 class SQLiteDialect(default.DefaultDialect):
     name = "sqlite"
     supports_alter = False
-    supports_unicode_statements = True
-    supports_unicode_binds = True
 
     # SQlite supports "DEFAULT VALUES" but *does not* support
     # "VALUES (DEFAULT)"
author	Mike Bayer <mike_mp@zzzcomputing.com>	2021-11-07 15:47:15 -0500
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2021-11-10 11:24:53 -0500
commit	bd2a6e9b161251606b64d299faec583d55c2e802 (patch)
tree	cb9e304b72be61c59709caa792920515afc26c32 /lib/sqlalchemy/dialects
parent	0c6071513fea9d183dc67979a239dff746992571 (diff)
download	sqlalchemy-bd2a6e9b161251606b64d299faec583d55c2e802.tar.gz