- rework Oracle to no longer do its own unicode conversion; this has been observed

to be very slow. this now has the effect of producing "conditional" unicode conversion for the Oracle backend, as it still returns NVARCHAR etc. as unicode [ticket:2911] - add new "conditional" functionality to unicode processors; the C-level function now uses PyUnicode_Check() as a fast alternative to the isinstance() check in Python
author: Mike Bayer <mike_mp@zzzcomputing.com> 2014-01-17 17:36:43 -0500
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2014-01-17 17:36:43 -0500
commit: 882f615c68cd2d244a8d2cf480f3532a84bdb6fa (patch)
tree: 546c82bc04351bca317f570f1a696ebc3ae5674e /lib/sqlalchemy
parent: 4765895d10ff4bc89f30c99fa709438fa9764b6c (diff)
download: sqlalchemy-882f615c68cd2d244a8d2cf480f3532a84bdb6fa.tar.gz
4 files changed, 67 insertions, 17 deletions
diff --git a/lib/sqlalchemy/cextension/processors.c b/lib/sqlalchemy/cextension/processors.c
index c1e68fe0f..d56817763 100644
--- a/lib/sqlalchemy/cextension/processors.c
+++ b/lib/sqlalchemy/cextension/processors.c
@@ -409,6 +409,45 @@ UnicodeResultProcessor_process(UnicodeResultProcessor *self, PyObject *value)
     return PyUnicode_Decode(str, len, encoding, errors);
 }
 
+static PyObject *
+UnicodeResultProcessor_conditional_process(UnicodeResultProcessor *self, PyObject *value)
+{
+    const char *encoding, *errors;
+    char *str;
+    Py_ssize_t len;
+
+    if (value == Py_None)
+        Py_RETURN_NONE;
+
+#if PY_MAJOR_VERSION >= 3
+    if (PyUnicode_Check(value) == 1) {
+        Py_INCREF(value);
+        return value;
+    }
+
+    if (PyBytes_AsStringAndSize(value, &str, &len))
+        return NULL;
+
+    encoding = PyBytes_AS_STRING(self->encoding);
+    errors = PyBytes_AS_STRING(self->errors);
+#else
+
+    if (PyUnicode_Check(value) == 1) {
+        Py_INCREF(value);
+        return value;
+    }
+
+    if (PyString_AsStringAndSize(value, &str, &len))
+        return NULL;
+
+
+    encoding = PyString_AS_STRING(self->encoding);
+    errors = PyString_AS_STRING(self->errors);
+#endif
+
+    return PyUnicode_Decode(str, len, encoding, errors);
+}
+
 static void
 UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self)
 {
@@ -424,6 +463,8 @@ UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self)
 static PyMethodDef UnicodeResultProcessor_methods[] = {
     {"process", (PyCFunction)UnicodeResultProcessor_process, METH_O,
      "The value processor itself."},
+    {"conditional_process", (PyCFunction)UnicodeResultProcessor_conditional_process, METH_O,
+     "Conditional version of the value processor."},
     {NULL}  /* Sentinel */
 };
 
diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
index c427e4bca..599eb21a3 100644
--- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py
+++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
@@ -748,9 +748,6 @@ class OracleDialect_cx_oracle(OracleDialect):
                             255,
                             outconverter=self._detect_decimal,
                             arraysize=cursor.arraysize)
-            # allow all strings to come back natively as Unicode
-            elif defaultType in (cx_Oracle.STRING, cx_Oracle.FIXED_CHAR):
-                return cursor.var(util.text_type, size, cursor.arraysize)
 
         def on_connect(conn):
             conn.outputtypehandler = output_type_handler
diff --git a/lib/sqlalchemy/processors.py b/lib/sqlalchemy/processors.py
index 0abf063b3..d0f52e42b 100644
--- a/lib/sqlalchemy/processors.py
+++ b/lib/sqlalchemy/processors.py
@@ -15,6 +15,7 @@ They all share one common characteristic: None is passed through unchanged.
 import codecs
 import re
 import datetime
+from . import util
 
 
 def str_to_datetime_processor_factory(regexp, type_):
@@ -66,6 +67,21 @@ def py_fallback():
                 return decoder(value, errors)[0]
         return process
 
+    def to_conditional_unicode_processor_factory(encoding, errors=None):
+        decoder = codecs.getdecoder(encoding)
+
+        def process(value):
+            if value is None:
+                return None
+            elif isinstance(value, util.text_type):
+                return value
+            else:
+                # decoder returns a tuple: (value, len). Simply dropping the
+                # len part is safe: it is done that way in the normal
+                # 'xx'.decode(encoding) code path.
+                return decoder(value, errors)[0]
+        return process
+
     def to_decimal_processor_factory(target_class, scale):
         fstring = "%%.%df" % scale
 
@@ -113,12 +129,17 @@ try:
                                        str_to_date
 
     def to_unicode_processor_factory(encoding, errors=None):
-        # this is cumbersome but it would be even more so on the C side
         if errors is not None:
             return UnicodeResultProcessor(encoding, errors).process
         else:
             return UnicodeResultProcessor(encoding).process
 
+    def to_conditional_unicode_processor_factory(encoding, errors=None):
+        if errors is not None:
+            return UnicodeResultProcessor(encoding, errors).conditional_process
+        else:
+            return UnicodeResultProcessor(encoding).conditional_process
+
     def to_decimal_processor_factory(target_class, scale):
         # Note that the scale argument is not taken into account for integer
         # values in the C implementation while it is in the Python one.
diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py
index 702e77360..0cc90f26b 100644
--- a/lib/sqlalchemy/sql/sqltypes.py
+++ b/lib/sqlalchemy/sql/sqltypes.py
@@ -204,20 +204,11 @@ class String(Concatenable, TypeEngine):
                                     dialect.encoding, self.unicode_error)
 
             if needs_isinstance:
-                # we wouldn't be here unless convert_unicode='force'
-                # was specified, or the driver has erratic unicode-returning
-                # habits.  since we will be getting back unicode
-                # in most cases, we check for it (decode will fail).
-                def process(value):
-                    if isinstance(value, util.text_type):
-                        return value
-                    else:
-                        return to_unicode(value)
-                return process
+                return processors.to_conditional_unicode_processor_factory(
+                                    dialect.encoding, self.unicode_error)
             else:
-                # here, we assume that the object is not unicode,
-                # avoiding expensive isinstance() check.
-                return to_unicode
+                return processors.to_unicode_processor_factory(
+                                    dialect.encoding, self.unicode_error)
         else:
             return None
author	Mike Bayer <mike_mp@zzzcomputing.com>	2014-01-17 17:36:43 -0500
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2014-01-17 17:36:43 -0500
commit	882f615c68cd2d244a8d2cf480f3532a84bdb6fa (patch)
tree	546c82bc04351bca317f570f1a696ebc3ae5674e /lib/sqlalchemy
parent	4765895d10ff4bc89f30c99fa709438fa9764b6c (diff)
download	sqlalchemy-882f615c68cd2d244a8d2cf480f3532a84bdb6fa.tar.gz