diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-04-03 13:44:57 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-04-03 14:47:52 -0400 |
| commit | ccadbec82555c53eefa889160510f5af1e224709 (patch) | |
| tree | 959b4309fcc26191ef791034bda76e6fda0d1bdb /lib/sqlalchemy | |
| parent | 1dffb7cedeb009ca6c532db558bd0588dd846957 (diff) | |
| download | sqlalchemy-ccadbec82555c53eefa889160510f5af1e224709.tar.gz | |
use .fromisoformat() for sqlite datetime, date, time parsing
SQLite datetime, date, and time datatypes now use Python standard lib
``fromisoformat()`` methods in order to parse incoming datetime, date, and
time string values. This improves performance vs. the previous regular
expression-based approach, and also automatically accommodates for datetime
and time formats that contain either a six-digit "microseconds" format or a
three-digit "milliseconds" format.
Fixes: #7029
Change-Id: I67aab4fe5ee3055e5996050cf4564981413cc221
Diffstat (limited to 'lib/sqlalchemy')
| -rw-r--r-- | lib/sqlalchemy/cyextension/processors.pyx | 57 | ||||
| -rw-r--r-- | lib/sqlalchemy/dialects/sqlite/base.py | 36 | ||||
| -rw-r--r-- | lib/sqlalchemy/engine/_py_processors.py | 35 | ||||
| -rw-r--r-- | lib/sqlalchemy/testing/suite/test_types.py | 2 |
4 files changed, 71 insertions, 59 deletions
diff --git a/lib/sqlalchemy/cyextension/processors.pyx b/lib/sqlalchemy/cyextension/processors.pyx index 9f23e73b1..b0ad865c5 100644 --- a/lib/sqlalchemy/cyextension/processors.pyx +++ b/lib/sqlalchemy/cyextension/processors.pyx @@ -1,7 +1,9 @@ import datetime +from datetime import datetime as datetime_cls +from datetime import time as time_cls +from datetime import date as date_cls import re -from cpython.datetime cimport date_new, datetime_new, import_datetime, time_new from cpython.object cimport PyObject_Str from cpython.unicode cimport PyUnicode_AsASCIIString, PyUnicode_Check, PyUnicode_Decode from libc.stdio cimport sscanf @@ -27,53 +29,22 @@ cdef inline bytes to_bytes(object value, str type_name): "- value is not a string." ) from e -import_datetime() # required to call datetime_new/date_new/time_new - def str_to_datetime(value): - if value is None: - return None - cdef int numparsed - cdef unsigned int year, month, day, hour, minute, second, microsecond = 0 - cdef bytes value_b = to_bytes(value, 'datetime') - cdef const char * string = value_b - - numparsed = sscanf(string, "%4u-%2u-%2u %2u:%2u:%2u.%6u", - &year, &month, &day, &hour, &minute, &second, µsecond) - if numparsed < 6: - raise ValueError( - "Couldn't parse datetime string: '%s'" % (value) - ) - return datetime_new(year, month, day, hour, minute, second, microsecond, None) + if value is not None: + value = datetime_cls.fromisoformat(value) + return value -def str_to_date(value): - if value is None: - return None - cdef int numparsed - cdef unsigned int year, month, day - cdef bytes value_b = to_bytes(value, 'date') - cdef const char * string = value_b +def str_to_time(value): + if value is not None: + value = time_cls.fromisoformat(value) + return value - numparsed = sscanf(string, "%4u-%2u-%2u", &year, &month, &day) - if numparsed != 3: - raise ValueError( - "Couldn't parse date string: '%s'" % (value) - ) - return date_new(year, month, day) -def str_to_time(value): - if value is None: - return None - cdef int numparsed - cdef unsigned int hour, minute, second, microsecond = 0 - cdef bytes value_b = to_bytes(value, 'time') - cdef const char * string = value_b +def str_to_date(value): + if value is not None: + value = date_cls.fromisoformat(value) + return value - numparsed = sscanf(string, "%2u:%2u:%2u.%6u", &hour, &minute, &second, µsecond) - if numparsed < 3: - raise ValueError( - "Couldn't parse time string: '%s'" % (value) - ) - return time_new(hour, minute, second, microsecond, None) cdef class DecimalResultProcessor: diff --git a/lib/sqlalchemy/dialects/sqlite/base.py b/lib/sqlalchemy/dialects/sqlite/base.py index b1ac20383..f21ab9083 100644 --- a/lib/sqlalchemy/dialects/sqlite/base.py +++ b/lib/sqlalchemy/dialects/sqlite/base.py @@ -926,6 +926,12 @@ class DATETIME(_DateTimeMixin, sqltypes.DateTime): 2021-03-15 12:05:57.105542 + The incoming storage format is by default parsed using the + Python ``datetime.fromisoformat()`` function. + + .. versionchanged:: 2.0 ``datetime.fromisoformat()`` is used for default + datetime string parsing. + The storage format can be customized to some degree using the ``storage_format`` and ``regexp`` parameters, such as:: @@ -941,7 +947,8 @@ class DATETIME(_DateTimeMixin, sqltypes.DateTime): with keys year, month, day, hour, minute, second, and microsecond. :param regexp: regular expression which will be applied to incoming result - rows. If the regexp contains named groups, the resulting match dict is + rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming + strings. If the regexp contains named groups, the resulting match dict is applied to the Python datetime() constructor as keyword arguments. Otherwise, if positional groups are used, the datetime() constructor is called with positional arguments via @@ -1027,6 +1034,13 @@ class DATE(_DateTimeMixin, sqltypes.Date): 2011-03-15 + The incoming storage format is by default parsed using the + Python ``date.fromisoformat()`` function. + + .. versionchanged:: 2.0 ``date.fromisoformat()`` is used for default + date string parsing. + + The storage format can be customized to some degree using the ``storage_format`` and ``regexp`` parameters, such as:: @@ -1042,11 +1056,13 @@ class DATE(_DateTimeMixin, sqltypes.Date): dict with keys year, month, and day. :param regexp: regular expression which will be applied to - incoming result rows. If the regexp contains named groups, the - resulting match dict is applied to the Python date() constructor - as keyword arguments. Otherwise, if positional groups are used, the - date() constructor is called with positional arguments via + incoming result rows, replacing the use of ``date.fromisoformat()`` to + parse incoming strings. If the regexp contains named groups, the resulting + match dict is applied to the Python date() constructor as keyword + arguments. Otherwise, if positional groups are used, the date() + constructor is called with positional arguments via ``*map(int, match_obj.groups(0))``. + """ _storage_format = "%(year)04d-%(month)02d-%(day)02d" @@ -1092,6 +1108,12 @@ class TIME(_DateTimeMixin, sqltypes.Time): 12:05:57.10558 + The incoming storage format is by default parsed using the + Python ``time.fromisoformat()`` function. + + .. versionchanged:: 2.0 ``time.fromisoformat()`` is used for default + time string parsing. + The storage format can be customized to some degree using the ``storage_format`` and ``regexp`` parameters, such as:: @@ -1107,10 +1129,12 @@ class TIME(_DateTimeMixin, sqltypes.Time): with keys hour, minute, second, and microsecond. :param regexp: regular expression which will be applied to incoming result - rows. If the regexp contains named groups, the resulting match dict is + rows, replacing the use of ``datetime.fromisoformat()`` to parse incoming + strings. If the regexp contains named groups, the resulting match dict is applied to the Python time() constructor as keyword arguments. Otherwise, if positional groups are used, the time() constructor is called with positional arguments via ``*map(int, match_obj.groups(0))``. + """ _storage_format = "%(hour)02d:%(minute)02d:%(second)02d.%(microsecond)06d" diff --git a/lib/sqlalchemy/engine/_py_processors.py b/lib/sqlalchemy/engine/_py_processors.py index 27cb9e939..63f03466a 100644 --- a/lib/sqlalchemy/engine/_py_processors.py +++ b/lib/sqlalchemy/engine/_py_processors.py @@ -16,8 +16,10 @@ They all share one common characteristic: None is passed through unchanged. from __future__ import annotations import datetime +from datetime import date as date_cls +from datetime import datetime as datetime_cls +from datetime import time as time_cls from decimal import Decimal -import re import typing from typing import Any from typing import Callable @@ -26,6 +28,7 @@ from typing import Type from typing import TypeVar from typing import Union + _DT = TypeVar( "_DT", bound=Union[datetime.datetime, datetime.time, datetime.date] ) @@ -50,6 +53,7 @@ def str_to_datetime_processor_factory( "Couldn't parse %s string '%r' " "- value is not a string." % (type_.__name__, value) ) from err + if m is None: raise ValueError( "Couldn't parse %s string: " @@ -108,12 +112,25 @@ def int_to_boolean(value: Optional[int]) -> Optional[bool]: return bool(value) -DATETIME_RE = re.compile(r"(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)(?:\.(\d+))?") -TIME_RE = re.compile(r"(\d+):(\d+):(\d+)(?:\.(\d+))?") -DATE_RE = re.compile(r"(\d+)-(\d+)-(\d+)") +def str_to_datetime(value: Optional[str]) -> Optional[datetime.datetime]: + if value is not None: + dt_value = datetime_cls.fromisoformat(value) + else: + dt_value = None + return dt_value -str_to_datetime = str_to_datetime_processor_factory( - DATETIME_RE, datetime.datetime -) -str_to_time = str_to_datetime_processor_factory(TIME_RE, datetime.time) -str_to_date = str_to_datetime_processor_factory(DATE_RE, datetime.date) + +def str_to_time(value: Optional[str]) -> Optional[datetime.time]: + if value is not None: + dt_value = time_cls.fromisoformat(value) + else: + dt_value = None + return dt_value + + +def str_to_date(value: Optional[str]) -> Optional[datetime.date]: + if value is not None: + dt_value = date_cls.fromisoformat(value) + else: + dt_value = None + return dt_value diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index 0940eab9b..cc14dd9c4 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -432,7 +432,7 @@ class DateTimeMicrosecondsTest(_DateFixture, fixtures.TablesTest): __requires__ = ("datetime_microseconds",) __backend__ = True datatype = DateTime - data = datetime.datetime(2012, 10, 15, 12, 57, 18, 396) + data = datetime.datetime(2012, 10, 15, 12, 57, 18, 39642) class TimestampMicrosecondsTest(_DateFixture, fixtures.TablesTest): |
