diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2014-01-17 17:36:43 -0500 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2014-01-17 17:36:43 -0500 |
| commit | 882f615c68cd2d244a8d2cf480f3532a84bdb6fa (patch) | |
| tree | 546c82bc04351bca317f570f1a696ebc3ae5674e /lib/sqlalchemy | |
| parent | 4765895d10ff4bc89f30c99fa709438fa9764b6c (diff) | |
| download | sqlalchemy-882f615c68cd2d244a8d2cf480f3532a84bdb6fa.tar.gz | |
- rework Oracle to no longer do its own unicode conversion; this has been observed
to be very slow. this now has the effect of producing "conditional" unicode
conversion for the Oracle backend, as it still returns NVARCHAR etc. as unicode
[ticket:2911]
- add new "conditional" functionality to unicode processors; the C-level
function now uses PyUnicode_Check() as a fast alternative to the isinstance()
check in Python
Diffstat (limited to 'lib/sqlalchemy')
| -rw-r--r-- | lib/sqlalchemy/cextension/processors.c | 41 | ||||
| -rw-r--r-- | lib/sqlalchemy/dialects/oracle/cx_oracle.py | 3 | ||||
| -rw-r--r-- | lib/sqlalchemy/processors.py | 23 | ||||
| -rw-r--r-- | lib/sqlalchemy/sql/sqltypes.py | 17 |
4 files changed, 67 insertions, 17 deletions
diff --git a/lib/sqlalchemy/cextension/processors.c b/lib/sqlalchemy/cextension/processors.c index c1e68fe0f..d56817763 100644 --- a/lib/sqlalchemy/cextension/processors.c +++ b/lib/sqlalchemy/cextension/processors.c @@ -409,6 +409,45 @@ UnicodeResultProcessor_process(UnicodeResultProcessor *self, PyObject *value) return PyUnicode_Decode(str, len, encoding, errors); } +static PyObject * +UnicodeResultProcessor_conditional_process(UnicodeResultProcessor *self, PyObject *value) +{ + const char *encoding, *errors; + char *str; + Py_ssize_t len; + + if (value == Py_None) + Py_RETURN_NONE; + +#if PY_MAJOR_VERSION >= 3 + if (PyUnicode_Check(value) == 1) { + Py_INCREF(value); + return value; + } + + if (PyBytes_AsStringAndSize(value, &str, &len)) + return NULL; + + encoding = PyBytes_AS_STRING(self->encoding); + errors = PyBytes_AS_STRING(self->errors); +#else + + if (PyUnicode_Check(value) == 1) { + Py_INCREF(value); + return value; + } + + if (PyString_AsStringAndSize(value, &str, &len)) + return NULL; + + + encoding = PyString_AS_STRING(self->encoding); + errors = PyString_AS_STRING(self->errors); +#endif + + return PyUnicode_Decode(str, len, encoding, errors); +} + static void UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self) { @@ -424,6 +463,8 @@ UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self) static PyMethodDef UnicodeResultProcessor_methods[] = { {"process", (PyCFunction)UnicodeResultProcessor_process, METH_O, "The value processor itself."}, + {"conditional_process", (PyCFunction)UnicodeResultProcessor_conditional_process, METH_O, + "Conditional version of the value processor."}, {NULL} /* Sentinel */ }; diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py index c427e4bca..599eb21a3 100644 --- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py +++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py @@ -748,9 +748,6 @@ class OracleDialect_cx_oracle(OracleDialect): 255, outconverter=self._detect_decimal, arraysize=cursor.arraysize) - # allow all strings to come back natively as Unicode - elif defaultType in (cx_Oracle.STRING, cx_Oracle.FIXED_CHAR): - return cursor.var(util.text_type, size, cursor.arraysize) def on_connect(conn): conn.outputtypehandler = output_type_handler diff --git a/lib/sqlalchemy/processors.py b/lib/sqlalchemy/processors.py index 0abf063b3..d0f52e42b 100644 --- a/lib/sqlalchemy/processors.py +++ b/lib/sqlalchemy/processors.py @@ -15,6 +15,7 @@ They all share one common characteristic: None is passed through unchanged. import codecs import re import datetime +from . import util def str_to_datetime_processor_factory(regexp, type_): @@ -66,6 +67,21 @@ def py_fallback(): return decoder(value, errors)[0] return process + def to_conditional_unicode_processor_factory(encoding, errors=None): + decoder = codecs.getdecoder(encoding) + + def process(value): + if value is None: + return None + elif isinstance(value, util.text_type): + return value + else: + # decoder returns a tuple: (value, len). Simply dropping the + # len part is safe: it is done that way in the normal + # 'xx'.decode(encoding) code path. + return decoder(value, errors)[0] + return process + def to_decimal_processor_factory(target_class, scale): fstring = "%%.%df" % scale @@ -113,12 +129,17 @@ try: str_to_date def to_unicode_processor_factory(encoding, errors=None): - # this is cumbersome but it would be even more so on the C side if errors is not None: return UnicodeResultProcessor(encoding, errors).process else: return UnicodeResultProcessor(encoding).process + def to_conditional_unicode_processor_factory(encoding, errors=None): + if errors is not None: + return UnicodeResultProcessor(encoding, errors).conditional_process + else: + return UnicodeResultProcessor(encoding).conditional_process + def to_decimal_processor_factory(target_class, scale): # Note that the scale argument is not taken into account for integer # values in the C implementation while it is in the Python one. diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py index 702e77360..0cc90f26b 100644 --- a/lib/sqlalchemy/sql/sqltypes.py +++ b/lib/sqlalchemy/sql/sqltypes.py @@ -204,20 +204,11 @@ class String(Concatenable, TypeEngine): dialect.encoding, self.unicode_error) if needs_isinstance: - # we wouldn't be here unless convert_unicode='force' - # was specified, or the driver has erratic unicode-returning - # habits. since we will be getting back unicode - # in most cases, we check for it (decode will fail). - def process(value): - if isinstance(value, util.text_type): - return value - else: - return to_unicode(value) - return process + return processors.to_conditional_unicode_processor_factory( + dialect.encoding, self.unicode_error) else: - # here, we assume that the object is not unicode, - # avoiding expensive isinstance() check. - return to_unicode + return processors.to_unicode_processor_factory( + dialect.encoding, self.unicode_error) else: return None |
