diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-06-16 17:36:45 -0500 |
---|---|---|
committer | Mark Wiebe <mwiebe@enthought.com> | 2011-06-16 17:36:45 -0500 |
commit | 4df8499f5dc7ae043a3aedc6c988011c0d9840e7 (patch) | |
tree | 0f3792bf81f18608f27ea994218cb647847f689a | |
parent | 382233c3d0fb03f20224dbfbc21e803fd7078407 (diff) | |
download | numpy-4df8499f5dc7ae043a3aedc6c988011c0d9840e7.tar.gz |
ENH: datetime-autounit: Detect unit from string arrays as well
Also move datetime string functions to their own source file.
-rw-r--r-- | numpy/core/SConscript | 1 | ||||
-rw-r--r-- | numpy/core/code_generators/genapi.py | 1 | ||||
-rw-r--r-- | numpy/core/setup.py | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/_datetime.h | 111 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime.c | 1358 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime_strings.c | 1219 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime_strings.h | 77 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule_onefile.c | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/scalartypes.c.src | 5 |
10 files changed, 1490 insertions, 1287 deletions
diff --git a/numpy/core/SConscript b/numpy/core/SConscript index 0baea3d0c..3a563e2e8 100644 --- a/numpy/core/SConscript +++ b/numpy/core/SConscript @@ -439,6 +439,7 @@ if ENABLE_SEPARATE_COMPILATION: pjoin('src', 'multiarray', 'hashdescr.c'), pjoin('src', 'multiarray', 'arrayobject.c'), pjoin('src', 'multiarray', 'datetime.c'), + pjoin('src', 'multiarray', 'datetime_strings.c'), pjoin('src', 'multiarray', 'datetime_busday.c'), pjoin('src', 'multiarray', 'datetime_busdaycal.c'), pjoin('src', 'multiarray', 'numpyos.c'), diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index 844aebff0..f69424d31 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -44,6 +44,7 @@ API_FILES = [join('multiarray', 'methods.c'), join('multiarray', 'conversion_utils.c'), join('multiarray', 'buffer.c'), join('multiarray', 'datetime.c'), + join('multiarray', 'datetime_strings.c'), join('multiarray', 'datetime_busday.c'), join('multiarray', 'datetime_busdaycal.c'), join('multiarray', 'nditer.c.src'), diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 87e4e4f90..c8d348410 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -747,6 +747,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'numpymemoryview.c'), join('src', 'multiarray', 'buffer.c'), join('src', 'multiarray', 'datetime.c'), + join('src', 'multiarray', 'datetime_strings.c'), join('src', 'multiarray', 'datetime_busday.c'), join('src', 'multiarray', 'datetime_busdaycal.c'), join('src', 'multiarray', 'numpyos.c'), diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index a94abc6c7..e11e0eeeb 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -1,10 +1,26 @@ #ifndef _NPY_PRIVATE__DATETIME_H_ #define _NPY_PRIVATE__DATETIME_H_ +NPY_NO_EXPORT char *_datetime_strings[NPY_DATETIME_NUMUNITS]; + +NPY_NO_EXPORT int _days_per_month_table[2][12]; + NPY_NO_EXPORT void numpy_pydatetime_import(); /* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +NPY_NO_EXPORT int +is_leapyear(npy_int64 year); + +/* + * Calculates the days offset from the 1970 epoch. + */ +NPY_NO_EXPORT npy_int64 +get_datetimestruct_days(const npy_datetimestruct *dts); + +/* * Creates a datetime or timedelta dtype using a copy of the provided metadata. */ NPY_NO_EXPORT PyArray_Descr * @@ -104,6 +120,17 @@ datetime_metadata_divides( int strict_with_nonlinear_units); /* + * This provides the casting rules for the DATETIME data type units. + * + * Notably, there is a barrier between 'date units' and 'time units' + * for all but 'unsafe' casting. + */ +NPY_NO_EXPORT npy_bool +can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit, + NPY_DATETIMEUNIT dst_unit, + NPY_CASTING casting); + +/* * This provides the casting rules for the DATETIME data type metadata. */ NPY_NO_EXPORT npy_bool @@ -112,6 +139,17 @@ can_cast_datetime64_metadata(PyArray_DatetimeMetaData *src_meta, NPY_CASTING casting); /* + * This provides the casting rules for the TIMEDELTA data type units. + * + * Notably, there is a barrier between the nonlinear years and + * months units, and all the other units. + */ +NPY_NO_EXPORT npy_bool +can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit, + NPY_DATETIMEUNIT dst_unit, + NPY_CASTING casting); + +/* * This provides the casting rules for the TIMEDELTA data type metadata. */ NPY_NO_EXPORT npy_bool @@ -196,79 +234,6 @@ append_metastr_to_string(PyArray_DatetimeMetaData *meta, PyObject *ret); /* - * Provides a string length to use for converting datetime - * objects with the given local and unit settings. - */ -NPY_NO_EXPORT int -get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); - -/* - * Parses (almost) standard ISO 8601 date strings. The differences are: - * - * + After the date and time, may place a ' ' followed by an event number. - * + The date "20100312" is parsed as the year 20100312, not as - * equivalent to "2010-03-12". The '-' in the dates are not optional. - * + Only seconds may have a decimal point, with up to 18 digits after it - * (maximum attoseconds precision). - * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate - * the date and the time. Both are treated equivalently. - * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. - * + Doesn't handle leap seconds (seconds value has 60 in these cases). - * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow - * + Accepts special values "NaT" (not a time), "Today", (current - * day according to local time) and "Now" (current time in UTC). - * - * 'str' must be a NULL-terminated string, and 'len' must be its length. - * 'unit' should contain -1 if the unit is unknown, or the unit - * which will be used if it is. - * 'casting' controls how the detected unit from the string is allowed - * to be cast to the 'unit' parameter. - * - * 'out' gets filled with the parsed date-time. - * 'out_local' gets set to 1 if the parsed time was in local time, - * to 0 otherwise. The values 'now' and 'today' don't get counted - * as local, and neither do UTC +/-#### timezone offsets, because - * they aren't using the computer's local timezone offset. - * 'out_bestunit' gives a suggested unit based on the amount of - * resolution provided in the string, or -1 for NaT. - * 'out_special' gets set to 1 if the parsed time was 'today', - * 'now', or ''/'NaT'. For 'today', the unit recommended is - * 'D', for 'now', the unit recommended is 's', and for 'NaT' - * the unit recommended is 'Y'. - * - * Returns 0 on success, -1 on failure. - */ -NPY_NO_EXPORT int -parse_iso_8601_date(char *str, int len, - NPY_DATETIMEUNIT unit, - NPY_CASTING casting, - npy_datetimestruct *out, - npy_bool *out_local, - NPY_DATETIMEUNIT *out_bestunit, - npy_bool *out_special); - -/* - * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string. - * - * If 'local' is non-zero, it produces a string in local time with - * a +-#### timezone offset, otherwise it uses timezone Z (UTC). - * - * 'base' restricts the output to that unit. Set 'base' to - * -1 to auto-detect a base after which all the values are zero. - * - * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is - * set to a value other than -1. This is a manual override for - * the local time zone to use, as an offset in minutes. - * - * Returns 0 on success, -1 on failure (for example if the output - * string was too short). - */ -NPY_NO_EXPORT int -make_iso_8601_date(npy_datetimestruct *dts, char *outstr, int outlen, - int local, NPY_DATETIMEUNIT base, int tzoffset); - -/* * Tests for and converts a Python datetime.datetime or datetime.date * object into a NumPy npy_datetimestruct. * diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index e344edcbd..301677895 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -1,5 +1,5 @@ /* - * This file implements core functionality for NumPy datetime + * This file implements core functionality for NumPy datetime. * * Written by Mark Wiebe (mwwiebe@gmail.com) * Copyright (c) 2011 by Enthought, Inc. @@ -20,8 +20,8 @@ #include "numpy/npy_3kcompat.h" #include "numpy/arrayscalars.h" -#include "methods.h" #include "_datetime.h" +#include "datetime_strings.h" /* * Imports the PyDateTime functions so we can create these objects. @@ -33,11 +33,8 @@ numpy_pydatetime_import() PyDateTime_IMPORT; } -static int -is_leapyear(npy_int64 year); - /* Exported as DATETIMEUNITS in multiarraymodule.c */ -NPY_NO_EXPORT char *_datetime_strings[] = { +NPY_NO_EXPORT char *_datetime_strings[NPY_DATETIME_NUMUNITS] = { NPY_STR_Y, NPY_STR_M, NPY_STR_W, @@ -55,12 +52,15 @@ NPY_NO_EXPORT char *_datetime_strings[] = { }; /* Days per month, regular year and leap year */ -static int days_in_month[2][12] = { +NPY_NO_EXPORT int _days_per_month_table[2][12] = { { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } }; -static int +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +NPY_NO_EXPORT int is_leapyear(npy_int64 year) { return (year & 0x3) == 0 && /* year % 4 == 0 */ @@ -71,7 +71,7 @@ is_leapyear(npy_int64 year) /* * Calculates the days offset from the 1970 epoch. */ -static npy_int64 +NPY_NO_EXPORT npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { int i, month; @@ -116,7 +116,7 @@ get_datetimestruct_days(const npy_datetimestruct *dts) days += year / 400; } - month_lengths = days_in_month[is_leapyear(dts->year)]; + month_lengths = _days_per_month_table[is_leapyear(dts->year)]; month = dts->month - 1; /* Add the months */ @@ -181,7 +181,7 @@ days_to_month_number(npy_datetime days) int *month_lengths, i; year = days_to_yearsdays(&days); - month_lengths = days_in_month[is_leapyear(year)]; + month_lengths = _days_per_month_table[is_leapyear(year)]; for (i = 0; i < 12; ++i) { if (days < month_lengths[i]) { @@ -206,7 +206,7 @@ set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) int *month_lengths, i; dts->year = days_to_yearsdays(&days); - month_lengths = days_in_month[is_leapyear(dts->year)]; + month_lengths = _days_per_month_table[is_leapyear(dts->year)]; for (i = 0; i < 12; ++i) { if (days < month_lengths[i]) { @@ -2424,12 +2424,12 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) dts->month = 12; } isleap = is_leapyear(dts->year); - dts->day += days_in_month[isleap][dts->month-1]; + dts->day += _days_per_month_table[isleap][dts->month-1]; } else if (dts->day > 28) { isleap = is_leapyear(dts->year); - if (dts->day > days_in_month[isleap][dts->month-1]) { - dts->day -= days_in_month[isleap][dts->month-1]; + if (dts->day > _days_per_month_table[isleap][dts->month-1]) { + dts->day -= _days_per_month_table[isleap][dts->month-1]; dts->month++; if (dts->month > 12) { dts->year++; @@ -2440,1199 +2440,6 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) } /* - * Parses (almost) standard ISO 8601 date strings. The differences are: - * - * + After the date and time, may place a ' ' followed by an event number. - * + The date "20100312" is parsed as the year 20100312, not as - * equivalent to "2010-03-12". The '-' in the dates are not optional. - * + Only seconds may have a decimal point, with up to 18 digits after it - * (maximum attoseconds precision). - * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate - * the date and the time. Both are treated equivalently. - * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. - * + Doesn't handle leap seconds (seconds value has 60 in these cases). - * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow - * + Accepts special values "NaT" (not a time), "Today", (current - * day according to local time) and "Now" (current time in UTC). - * - * 'str' must be a NULL-terminated string, and 'len' must be its length. - * 'unit' should contain -1 if the unit is unknown, or the unit - * which will be used if it is. - * 'casting' controls how the detected unit from the string is allowed - * to be cast to the 'unit' parameter. - * - * 'out' gets filled with the parsed date-time. - * 'out_local' gets set to 1 if the parsed time was in local time, - * to 0 otherwise. The values 'now' and 'today' don't get counted - * as local, and neither do UTC +/-#### timezone offsets, because - * they aren't using the computer's local timezone offset. - * 'out_bestunit' gives a suggested unit based on the amount of - * resolution provided in the string, or -1 for NaT. - * 'out_special' gets set to 1 if the parsed time was 'today', - * 'now', or ''/'NaT'. For 'today', the unit recommended is - * 'D', for 'now', the unit recommended is 's', and for 'NaT' - * the unit recommended is 'Y'. - * - * Returns 0 on success, -1 on failure. - */ -NPY_NO_EXPORT int -parse_iso_8601_date(char *str, int len, - NPY_DATETIMEUNIT unit, - NPY_CASTING casting, - npy_datetimestruct *out, - npy_bool *out_local, - NPY_DATETIMEUNIT *out_bestunit, - npy_bool *out_special) -{ - int year_leap = 0; - int i, numdigits; - char *substr, sublen; - NPY_DATETIMEUNIT bestunit; - - /* Initialize the output to all zeros */ - memset(out, 0, sizeof(npy_datetimestruct)); - out->month = 1; - out->day = 1; - - /* The empty string and case-variants of "NaT" parse to not-a-time */ - if (len <= 0 || (len == 3 && - tolower(str[0]) == 'n' && - tolower(str[1]) == 'a' && - tolower(str[2]) == 't')) { - out->year = NPY_DATETIME_NAT; - - /* - * Indicate that this was a special value, and - * recommend generic units. - */ - if (out_local != NULL) { - *out_local = 0; - } - if (out_bestunit != NULL) { - *out_bestunit = NPY_FR_GENERIC; - } - if (out_special != NULL) { - *out_special = 1; - } - - return 0; - } - - if (unit == NPY_FR_GENERIC) { - PyErr_SetString(PyExc_ValueError, - "Cannot create a NumPy datetime other than NaT " - "with generic units"); - return -1; - } - - /* - * The string "today" resolves to midnight of today's local date in UTC. - * This is perhaps a little weird, but done so that further truncation - * to a 'datetime64[D]' type produces the date you expect, rather than - * switching to an adjacent day depending on the current time and your - * timezone. - */ - if (len == 5 && tolower(str[0]) == 't' && - tolower(str[1]) == 'o' && - tolower(str[2]) == 'd' && - tolower(str[3]) == 'a' && - tolower(str[4]) == 'y') { - time_t rawtime = 0; - struct tm tm_; - - /* 'today' only works for units of days or larger */ - if (unit != -1 && unit > NPY_FR_D) { - PyErr_SetString(PyExc_ValueError, - "Special value 'today' can only be converted " - "to a NumPy datetime with 'D' or larger units"); - return -1; - } - - time(&rawtime); -#if defined(_WIN32) - if (localtime_s(&tm_, &rawtime) != 0) { - PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to " - "get local time"); - return -1; - } -#else - /* Other platforms may require something else */ - if (localtime_r(&rawtime, &tm_) == NULL) { - PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to " - "get local time"); - return -1; - } -#endif - out->year = tm_.tm_year + 1900; - out->month = tm_.tm_mon + 1; - out->day = tm_.tm_mday; - - bestunit = NPY_FR_D; - - /* - * Indicate that this was a special value, and - * is a date (unit 'D'). - */ - if (out_local != NULL) { - *out_local = 0; - } - if (out_bestunit != NULL) { - *out_bestunit = bestunit; - } - if (out_special != NULL) { - *out_special = 1; - } - - /* Check the casting rule */ - if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, - casting)) { - PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit " - "'%s' using casting rule %s", - str, _datetime_strings[unit], - npy_casting_to_string(casting)); - return -1; - } - - return 0; - } - - /* The string "now" resolves to the current UTC time */ - if (len == 3 && tolower(str[0]) == 'n' && - tolower(str[1]) == 'o' && - tolower(str[2]) == 'w') { - time_t rawtime = 0; - PyArray_DatetimeMetaData meta; - - time(&rawtime); - - /* Set up a dummy metadata for the conversion */ - meta.base = NPY_FR_s; - meta.num = 1; - meta.events = 1; - - bestunit = NPY_FR_s; - - /* - * Indicate that this was a special value, and - * use 's' because the time() function has resolution - * seconds. - */ - if (out_local != NULL) { - *out_local = 0; - } - if (out_bestunit != NULL) { - *out_bestunit = bestunit; - } - if (out_special != NULL) { - *out_special = 1; - } - - /* Check the casting rule */ - if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, - casting)) { - PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit " - "'%s' using casting rule %s", - str, _datetime_strings[unit], - npy_casting_to_string(casting)); - return -1; - } - - return convert_datetime_to_datetimestruct(&meta, rawtime, out); - } - - /* Anything else isn't a special value */ - if (out_special != NULL) { - *out_special = 0; - } - - substr = str; - sublen = len; - - /* Skip leading whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - } - - /* Leading '-' sign for negative year */ - if (*substr == '-') { - ++substr; - --sublen; - } - - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE YEAR (digits until the '-' character) */ - out->year = 0; - while (sublen > 0 && isdigit(*substr)) { - out->year = 10 * out->year + (*substr - '0'); - ++substr; - --sublen; - } - - /* Negate the year if necessary */ - if (str[0] == '-') { - out->year = -out->year; - } - /* Check whether it's a leap-year */ - year_leap = is_leapyear(out->year); - - /* Next character must be a '-' or the end of the string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - bestunit = NPY_FR_Y; - goto finish; - } - else if (*substr == '-') { - ++substr; - --sublen; - } - else { - goto parse_error; - } - - /* Can't have a trailing '-' */ - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE MONTH (2 digits) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - out->month = 10 * (substr[0] - '0') + (substr[1] - '0'); - - if (out->month < 1 || out->month > 12) { - PyErr_Format(PyExc_ValueError, - "Month out of range in datetime string \"%s\"", str); - goto error; - } - substr += 2; - sublen -= 2; - } - else { - goto parse_error; - } - - /* Next character must be a '-' or the end of the string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - bestunit = NPY_FR_M; - goto finish; - } - else if (*substr == '-') { - ++substr; - --sublen; - } - else { - goto parse_error; - } - - /* Can't have a trailing '-' */ - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE DAY (2 digits) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - out->day = 10 * (substr[0] - '0') + (substr[1] - '0'); - - if (out->day < 1 || - out->day > days_in_month[year_leap][out->month-1]) { - PyErr_Format(PyExc_ValueError, - "Day out of range in datetime string \"%s\"", str); - goto error; - } - substr += 2; - sublen -= 2; - } - else { - goto parse_error; - } - - /* Next character must be a 'T', ' ', or end of string */ - if (sublen == 0) { - if (out_local != NULL) { - *out_local = 0; - } - bestunit = NPY_FR_D; - goto finish; - } - else if (*substr != 'T' && *substr != ' ') { - goto parse_error; - } - else { - ++substr; - --sublen; - } - - /* PARSE THE HOURS (2 digits) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - out->hour = 10 * (substr[0] - '0') + (substr[1] - '0'); - - if (out->hour < 0 || out->hour >= 24) { - PyErr_Format(PyExc_ValueError, - "Hours out of range in datetime string \"%s\"", str); - goto error; - } - substr += 2; - sublen -= 2; - } - else { - goto parse_error; - } - - /* Next character must be a ':' or the end of the string */ - if (sublen > 0 && *substr == ':') { - ++substr; - --sublen; - } - else { - bestunit = NPY_FR_h; - goto parse_timezone; - } - - /* Can't have a trailing ':' */ - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE MINUTES (2 digits) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - out->min = 10 * (substr[0] - '0') + (substr[1] - '0'); - - if (out->hour < 0 || out->min >= 60) { - PyErr_Format(PyExc_ValueError, - "Minutes out of range in datetime string \"%s\"", str); - goto error; - } - substr += 2; - sublen -= 2; - } - else { - goto parse_error; - } - - /* Next character must be a ':' or the end of the string */ - if (sublen > 0 && *substr == ':') { - ++substr; - --sublen; - } - else { - bestunit = NPY_FR_m; - goto parse_timezone; - } - - /* Can't have a trailing ':' */ - if (sublen == 0) { - goto parse_error; - } - - /* PARSE THE SECONDS (2 digits) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - out->sec = 10 * (substr[0] - '0') + (substr[1] - '0'); - - if (out->sec < 0 || out->sec >= 60) { - PyErr_Format(PyExc_ValueError, - "Seconds out of range in datetime string \"%s\"", str); - goto error; - } - substr += 2; - sublen -= 2; - } - else { - goto parse_error; - } - - /* Next character may be a '.' indicating fractional seconds */ - if (sublen > 0 && *substr == '.') { - ++substr; - --sublen; - } - else { - bestunit = NPY_FR_s; - goto parse_timezone; - } - - /* PARSE THE MICROSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->us *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->us += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_us; - } - else { - bestunit = NPY_FR_ms; - } - goto parse_timezone; - } - - /* PARSE THE PICOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->ps *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->ps += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (sublen == 0 || !isdigit(*substr)) { - if (numdigits > 3) { - bestunit = NPY_FR_ps; - } - else { - bestunit = NPY_FR_ns; - } - goto parse_timezone; - } - - /* PARSE THE ATTOSECONDS (0 to 6 digits) */ - numdigits = 0; - for (i = 0; i < 6; ++i) { - out->as *= 10; - if (sublen > 0 && isdigit(*substr)) { - out->as += (*substr - '0'); - ++substr; - --sublen; - ++numdigits; - } - } - - if (numdigits > 3) { - bestunit = NPY_FR_as; - } - else { - bestunit = NPY_FR_fs; - } - -parse_timezone: - if (sublen == 0) { - /* - * ISO 8601 states to treat date-times without a timezone offset - * or 'Z' for UTC as local time. The C standard libary functions - * mktime and gmtime allow us to do this conversion. - * - * Only do this timezone adjustment for recent and future years. - */ - if (out->year > 1900 && out->year < 10000) { - time_t rawtime = 0; - struct tm tm_; - - tm_.tm_sec = out->sec; - tm_.tm_min = out->min; - tm_.tm_hour = out->hour; - tm_.tm_mday = out->day; - tm_.tm_mon = out->month - 1; - tm_.tm_year = out->year - 1900; - tm_.tm_isdst = -1; - - /* mktime converts a local 'struct tm' into a time_t */ - rawtime = mktime(&tm_); - if (rawtime == -1) { - PyErr_SetString(PyExc_OSError, "Failed to use mktime to " - "convert local time to UTC"); - goto error; - } - - /* gmtime converts a 'time_t' into a UTC 'struct tm' */ -#if defined(_WIN32) - if (gmtime_s(&tm_, &rawtime) != 0) { - PyErr_SetString(PyExc_OSError, "Failed to use gmtime_s to " - "get a UTC time"); - goto error; - } -#else - /* Other platforms may require something else */ - if (gmtime_r(&rawtime, &tm_) == NULL) { - PyErr_SetString(PyExc_OSError, "Failed to use gmtime_r to " - "get a UTC time"); - goto error; - } -#endif - out->sec = tm_.tm_sec; - out->min = tm_.tm_min; - out->hour = tm_.tm_hour; - out->day = tm_.tm_mday; - out->month = tm_.tm_mon + 1; - out->year = tm_.tm_year + 1900; - } - - /* Since neither "Z" nor a time-zone was specified, it's local */ - if (out_local != NULL) { - *out_local = 1; - } - - goto finish; - } - - /* UTC specifier */ - if (*substr == 'Z') { - /* "Z" means not local */ - if (out_local != NULL) { - *out_local = 0; - } - - if (sublen == 1) { - goto finish; - } - else { - ++substr; - --sublen; - } - } - /* Time zone offset */ - else if (*substr == '-' || *substr == '+') { - int offset_neg = 0, offset_hour = 0, offset_minute = 0; - - /* - * Since "local" means local with respect to the current - * machine, we say this is non-local. - */ - if (out_local != NULL) { - *out_local = 0; - } - - if (*substr == '-') { - offset_neg = 1; - } - ++substr; - --sublen; - - /* The hours offset */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_hour >= 24) { - PyErr_Format(PyExc_ValueError, - "Timezone hours offset out of range " - "in datetime string \"%s\"", str); - goto error; - } - } - else { - goto parse_error; - } - - /* The minutes offset is optional */ - if (sublen > 0) { - /* Optional ':' */ - if (*substr == ':') { - ++substr; - --sublen; - } - - /* The minutes offset (at the end of the string) */ - if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { - offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); - substr += 2; - sublen -= 2; - if (offset_minute >= 60) { - PyErr_Format(PyExc_ValueError, - "Timezone minutes offset out of range " - "in datetime string \"%s\"", str); - goto error; - } - } - else { - goto parse_error; - } - } - - /* Apply the time zone offset */ - if (offset_neg) { - offset_hour = -offset_hour; - offset_minute = -offset_minute; - } - add_minutes_to_datetimestruct(out, -60 * offset_hour - offset_minute); - } - - /* Skip trailing whitespace */ - while (sublen > 0 && isspace(*substr)) { - ++substr; - --sublen; - } - - if (sublen != 0) { - goto parse_error; - } - -finish: - if (out_bestunit != NULL) { - *out_bestunit = bestunit; - } - - /* Check the casting rule */ - if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, - casting)) { - PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit " - "'%s' using casting rule %s", - str, _datetime_strings[unit], - npy_casting_to_string(casting)); - return -1; - } - - return 0; - -parse_error: - PyErr_Format(PyExc_ValueError, - "Error parsing datetime string \"%s\" at position %d", - str, (int)(substr-str)); - return -1; - -error: - return -1; -} - -/* - * Provides a string length to use for converting datetime - * objects with the given local and unit settings. - */ -NPY_NO_EXPORT int -get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) -{ - int len = 0; - - /* If no unit is provided, return the maximum length */ - if (base == -1) { - return NPY_DATETIME_MAX_ISO8601_STRLEN; - } - - switch (base) { - /* Generic units can only be used to represent NaT */ - case NPY_FR_GENERIC: - return 4; - case NPY_FR_as: - len += 3; /* "###" */ - case NPY_FR_fs: - len += 3; /* "###" */ - case NPY_FR_ps: - len += 3; /* "###" */ - case NPY_FR_ns: - len += 3; /* "###" */ - case NPY_FR_us: - len += 3; /* "###" */ - case NPY_FR_ms: - len += 4; /* ".###" */ - case NPY_FR_s: - len += 3; /* ":##" */ - case NPY_FR_m: - len += 3; /* ":##" */ - case NPY_FR_h: - len += 3; /* "T##" */ - case NPY_FR_D: - case NPY_FR_W: - len += 3; /* "-##" */ - case NPY_FR_M: - len += 3; /* "-##" */ - case NPY_FR_Y: - len += 21; /* 64-bit year */ - break; - } - - if (base >= NPY_FR_h) { - if (local) { - len += 5; /* "+####" or "-####" */ - } - else { - len += 1; /* "Z" */ - } - } - - len += 1; /* NULL terminator */ - - return len; -} - -/* - * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string. - * - * If 'local' is non-zero, it produces a string in local time with - * a +-#### timezone offset, otherwise it uses timezone Z (UTC). - * - * 'base' restricts the output to that unit. Set 'base' to - * -1 to auto-detect a base after which all the values are zero. - * - * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is - * set to a value other than -1. This is a manual override for - * the local time zone to use, as an offset in minutes. - * - * Returns 0 on success, -1 on failure (for example if the output - * string was too short). - */ -NPY_NO_EXPORT int -make_iso_8601_date(npy_datetimestruct *dts, char *outstr, int outlen, - int local, NPY_DATETIMEUNIT base, int tzoffset) -{ - npy_datetimestruct dts_local; - int timezone_offset = 0; - - char *substr = outstr, sublen = outlen; - int tmplen; - - /* Handle NaT, and treat a datetime with generic units as NaT */ - if (dts->year == NPY_DATETIME_NAT || base == NPY_FR_GENERIC) { - if (outlen < 4) { - goto string_too_short; - } - outstr[0] = 'N'; - outstr[0] = 'a'; - outstr[0] = 'T'; - outstr[0] = '\0'; - - return 0; - } - - /* Only do local time within a reasonable year range */ - if ((dts->year <= 1900 || dts->year >= 10000) && tzoffset == -1) { - local = 0; - } - - /* Automatically detect a good unit */ - if (base == -1) { - if (dts->as % 1000 != 0) { - base = NPY_FR_as; - } - else if (dts->as != 0) { - base = NPY_FR_fs; - } - else if (dts->ps % 1000 != 0) { - base = NPY_FR_ps; - } - else if (dts->ps != 0) { - base = NPY_FR_ns; - } - else if (dts->us % 1000 != 0) { - base = NPY_FR_us; - } - else if (dts->us != 0) { - base = NPY_FR_ms; - } - else if (dts->sec != 0) { - base = NPY_FR_s; - } - /* - * hours and minutes don't get split up by default, and printing - * in local time forces minutes - */ - else if (local || dts->min != 0 || dts->hour != 0) { - base = NPY_FR_m; - } - /* dates don't get split up by default */ - else { - base = NPY_FR_D; - } - } - /* - * Print weeks with the same precision as days. - * - * TODO: Could print weeks with YYYY-Www format if the week - * epoch is a Monday. - */ - else if (base == NPY_FR_W) { - base = NPY_FR_D; - } - - /* Printed dates have no time zone */ - if (base < NPY_FR_h) { - local = 0; - } - - /* Use the C API to convert from UTC to local time */ - if (local && tzoffset == -1) { - time_t rawtime = 0, localrawtime; - struct tm tm_; - - /* - * Convert everything in 'dts' to a time_t, to minutes precision. - * This is POSIX time, which skips leap-seconds, but because - * we drop the seconds value from the npy_datetimestruct, everything - * is ok for this operation. - */ - rawtime = (time_t)get_datetimestruct_days(dts) * 24 * 60 * 60; - rawtime += dts->hour * 60 * 60; - rawtime += dts->min * 60; - - /* localtime converts a 'time_t' into a local 'struct tm' */ -#if defined(_WIN32) - if (localtime_s(&tm_, &rawtime) != 0) { - PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to " - "get a local time"); - return -1; - } -#else - /* Other platforms may require something else */ - if (localtime_r(&rawtime, &tm_) == NULL) { - PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to " - "get a local time"); - return -1; - } -#endif - /* Make a copy of the npy_datetimestruct we can modify */ - dts_local = *dts; - - /* Copy back all the values except seconds */ - dts_local.min = tm_.tm_min; - dts_local.hour = tm_.tm_hour; - dts_local.day = tm_.tm_mday; - dts_local.month = tm_.tm_mon + 1; - dts_local.year = tm_.tm_year + 1900; - - /* Extract the timezone offset that was applied */ - rawtime /= 60; - localrawtime = (time_t)get_datetimestruct_days(&dts_local) * 24 * 60; - localrawtime += dts_local.hour * 60; - localrawtime += dts_local.min; - - timezone_offset = localrawtime - rawtime; - - /* Set dts to point to our local time instead of the UTC time */ - dts = &dts_local; - } - /* Use the manually provided tzoffset */ - else if (local) { - /* Make a copy of the npy_datetimestruct we can modify */ - dts_local = *dts; - dts = &dts_local; - - /* Set and apply the required timezone offset */ - timezone_offset = tzoffset; - add_minutes_to_datetimestruct(dts, timezone_offset); - } - - /* YEAR */ -#ifdef _WIN32 - tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#else - tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); -#endif - /* If it ran out of space or there isn't space for the NULL terminator */ - if (tmplen < 0 || tmplen >= sublen) { - goto string_too_short; - } - substr += tmplen; - sublen -= tmplen; - - /* Stop if the unit is years */ - if (base == NPY_FR_Y) { - *substr = '\0'; - return 0; - } - - /* MONTH */ - substr[0] = '-'; - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->month / 10) + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->month % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is months */ - if (base == NPY_FR_M) { - *substr = '\0'; - return 0; - } - - /* DAY */ - substr[0] = '-'; - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->day / 10) + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->day % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is days */ - if (base == NPY_FR_D) { - *substr = '\0'; - return 0; - } - - /* HOUR */ - substr[0] = 'T'; - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->hour / 10) + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->hour % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is hours */ - if (base == NPY_FR_h) { - goto add_time_zone; - } - - /* MINUTE */ - substr[0] = ':'; - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->min / 10) + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->min % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is minutes */ - if (base == NPY_FR_m) { - goto add_time_zone; - } - - /* SECOND */ - substr[0] = ':'; - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->sec / 10) + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->sec % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is seconds */ - if (base == NPY_FR_s) { - goto add_time_zone; - } - - /* MILLISECOND */ - substr[0] = '.'; - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 100000) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->us / 10000) % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr[3] = (char)((dts->us / 1000) % 10 + '0'); - if (sublen <= 4 ) { - goto string_too_short; - } - substr += 4; - sublen -= 4; - - /* Stop if the unit is milliseconds */ - if (base == NPY_FR_ms) { - goto add_time_zone; - } - - /* MICROSECOND */ - substr[0] = (char)((dts->us / 100) % 10 + '0'); - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->us / 10) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)(dts->us % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is microseconds */ - if (base == NPY_FR_us) { - goto add_time_zone; - } - - /* NANOSECOND */ - substr[0] = (char)((dts->ps / 100000) % 10 + '0'); - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10000) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->ps / 1000) % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is nanoseconds */ - if (base == NPY_FR_ns) { - goto add_time_zone; - } - - /* PICOSECOND */ - substr[0] = (char)((dts->ps / 100) % 10 + '0'); - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->ps / 10) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)(dts->ps % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is picoseconds */ - if (base == NPY_FR_ps) { - goto add_time_zone; - } - - /* FEMTOSECOND */ - substr[0] = (char)((dts->as / 100000) % 10 + '0'); - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10000) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)((dts->as / 1000) % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - - /* Stop if the unit is femtoseconds */ - if (base == NPY_FR_fs) { - goto add_time_zone; - } - - /* ATTOSECOND */ - substr[0] = (char)((dts->as / 100) % 10 + '0'); - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((dts->as / 10) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)(dts->as % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr += 3; - sublen -= 3; - -add_time_zone: - if (local) { - /* Add the +/- sign */ - if (timezone_offset < 0) { - substr[0] = '-'; - timezone_offset = -timezone_offset; - } - else { - substr[0] = '+'; - } - if (sublen <= 1) { - goto string_too_short; - } - substr += 1; - sublen -= 1; - - /* Add the timezone offset */ - substr[0] = (char)((timezone_offset / (10*60)) % 10 + '0'); - if (sublen <= 1 ) { - goto string_too_short; - } - substr[1] = (char)((timezone_offset / 60) % 10 + '0'); - if (sublen <= 2 ) { - goto string_too_short; - } - substr[2] = (char)(((timezone_offset % 60) / 10) % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } - substr[3] = (char)((timezone_offset % 60) % 10 + '0'); - if (sublen <= 4 ) { - goto string_too_short; - } - substr += 4; - sublen -= 4; - } - /* UTC "Zulu" time */ - else { - substr[0] = 'Z'; - if (sublen <= 1) { - goto string_too_short; - } - substr += 1; - sublen -= 1; - } - - /* Add a NULL terminator, and return */ - substr[0] = '\0'; - - return 0; - -string_too_short: - /* Put a NULL terminator on anyway */ - if (outlen > 0) { - outstr[outlen-1] = '\0'; - } - - PyErr_Format(PyExc_RuntimeError, - "The string provided for NumPy ISO datetime formatting " - "was too short, with length %d", - outlen); - return -1; -} - -/* * Tests for and converts a Python datetime.datetime or datetime.date * object into a NumPy npy_datetimestruct. * @@ -3707,7 +2514,7 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out, goto invalid_date; } isleap = is_leapyear(out->year); - if (out->day < 1 || out->day > days_in_month[isleap][out->month-1]) { + if (out->day < 1 || out->day > _days_per_month_table[isleap][out->month-1]) { goto invalid_date; } @@ -3879,7 +2686,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj, } /* Parse the ISO date */ - if (parse_iso_8601_date(str, len, meta->base, NPY_SAFE_CASTING, + if (parse_iso_8601_datetime(str, len, meta->base, NPY_SAFE_CASTING, &dts, NULL, &bestunit, NULL) < 0) { Py_DECREF(bytes); return -1; @@ -4851,6 +3658,129 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step, return ret; } +/* + * Examines all the strings in the given string array, and parses them + * to find the right metadata. + * + * Returns 0 on success, -1 on failure. + */ +static int +find_string_array_datetime64_type(PyObject *obj, + PyArray_DatetimeMetaData *meta) +{ + NpyIter* iter; + NpyIter_IterNextFunc *iternext; + char **dataptr; + npy_intp *strideptr, *innersizeptr; + PyArray_Descr *string_dtype; + int maxlen, len; + char *tmp_buffer = NULL; + + npy_datetimestruct dts; + PyArray_DatetimeMetaData tmp_meta; + + /* Handle zero-sized arrays specially */ + if (PyArray_SIZE(obj) == 0) { + return 0; + } + + string_dtype = PyArray_DescrFromType(NPY_STRING); + if (string_dtype == NULL) { + return -1; + } + + /* Use unsafe casting to allow unicode -> ascii string */ + iter = NpyIter_New((PyArrayObject *)obj, NPY_ITER_READONLY| + NPY_ITER_EXTERNAL_LOOP, + NPY_KEEPORDER, NPY_UNSAFE_CASTING, + string_dtype); + Py_DECREF(string_dtype); + if (iter == NULL) { + return -1; + } + + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + NpyIter_Deallocate(iter); + return -1; + } + dataptr = NpyIter_GetDataPtrArray(iter); + strideptr = NpyIter_GetInnerStrideArray(iter); + innersizeptr = NpyIter_GetInnerLoopSizePtr(iter); + + /* Get the resulting string length */ + maxlen = NpyIter_GetDescrArray(iter)[0]->elsize; + + /* Allocate a buffer for strings which fill the buffer completely */ + tmp_buffer = PyArray_malloc(maxlen+1); + if (tmp_buffer == NULL) { + PyErr_NoMemory(); + NpyIter_Deallocate(iter); + return -1; + } + + /* The iteration loop */ + do { + /* Get the inner loop data/stride/count values */ + char* data = *dataptr; + npy_intp stride = *strideptr; + npy_intp count = *innersizeptr; + + /* The inner loop */ + while (count--) { + len = strnlen(data, maxlen); + + /* If the string is all full, use the buffer */ + if (len == maxlen) { + memcpy(tmp_buffer, data, len); + tmp_buffer[len] = '\0'; + + tmp_meta.base = -1; + if (parse_iso_8601_datetime(tmp_buffer, len, -1, + NPY_UNSAFE_CASTING, &dts, NULL, + &tmp_meta.base, NULL) < 0) { + goto fail; + } + } + /* Otherwise parse the data in place */ + else { + tmp_meta.base = -1; + if (parse_iso_8601_datetime(data, len, -1, + NPY_UNSAFE_CASTING, &dts, NULL, + &tmp_meta.base, NULL) < 0) { + goto fail; + } + } + + tmp_meta.num = 1; + tmp_meta.events = 1; + /* Combine it with 'meta' */ + if (compute_datetime_metadata_greatest_common_divisor(meta, + &tmp_meta, meta, 0, 0) < 0) { + goto fail; + } + + + data += stride; + } + } while(iternext(iter)); + + PyArray_free(tmp_buffer); + NpyIter_Deallocate(iter); + + return 0; + +fail: + if (tmp_buffer != NULL) { + PyArray_free(tmp_buffer); + } + if (iter != NULL) { + NpyIter_Deallocate(iter); + } + + return -1; +} + /* * Recursively determines the metadata for an NPY_DATETIME dtype. @@ -4864,8 +3794,13 @@ recursive_find_object_datetime64_type(PyObject *obj, /* Array -> use its metadata */ if (PyArray_Check(obj)) { PyArray_Descr *obj_dtype = PyArray_DESCR(obj); + + if (obj_dtype->type_num == NPY_STRING || + obj_dtype->type_num == NPY_UNICODE) { + return find_string_array_datetime64_type(obj, meta); + } /* If the array has metadata, use it */ - if (obj_dtype->type_num == NPY_DATETIME || + else if (obj_dtype->type_num == NPY_DATETIME || obj_dtype->type_num == NPY_TIMEDELTA) { PyArray_DatetimeMetaData *tmp_meta; @@ -5007,6 +3942,7 @@ recursive_find_object_timedelta64_type(PyObject *obj, /* Array -> use its metadata */ if (PyArray_Check(obj)) { PyArray_Descr *obj_dtype = PyArray_DESCR(obj); + /* If the array has metadata, use it */ if (obj_dtype->type_num == NPY_DATETIME || obj_dtype->type_num == NPY_TIMEDELTA) { diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c new file mode 100644 index 000000000..679906aee --- /dev/null +++ b/numpy/core/src/multiarray/datetime_strings.c @@ -0,0 +1,1219 @@ +/* + * This file implements string parsing and creation for NumPy datetime. + * + * Written by Mark Wiebe (mwwiebe@gmail.com) + * Copyright (c) 2011 by Enthought, Inc. + * + * See LICENSE.txt for the license. + */ + +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#include <time.h> + +#define _MULTIARRAYMODULE +#include <numpy/arrayobject.h> + +#include "npy_config.h" +#include "numpy/npy_3kcompat.h" + +#include "numpy/arrayscalars.h" +#include "methods.h" +#include "_datetime.h" +#include "datetime_strings.h" + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + After the date and time, may place a ' ' followed by an event number. + * + The date "20100312" is parsed as the year 20100312, not as + * equivalent to "2010-03-12". The '-' in the dates are not optional. + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * 'unit' should contain -1 if the unit is unknown, or the unit + * which will be used if it is. + * 'casting' controls how the detected unit from the string is allowed + * to be cast to the 'unit' parameter. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * 'out_bestunit' gives a suggested unit based on the amount of + * resolution provided in the string, or -1 for NaT. + * 'out_special' gets set to 1 if the parsed time was 'today', + * 'now', or ''/'NaT'. For 'today', the unit recommended is + * 'D', for 'now', the unit recommended is 's', and for 'NaT' + * the unit recommended is 'Y'. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +parse_iso_8601_datetime(char *str, int len, + NPY_DATETIMEUNIT unit, + NPY_CASTING casting, + npy_datetimestruct *out, + npy_bool *out_local, + NPY_DATETIMEUNIT *out_bestunit, + npy_bool *out_special) +{ + int year_leap = 0; + int i, numdigits; + char *substr, sublen; + NPY_DATETIMEUNIT bestunit; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + /* The empty string and case-variants of "NaT" parse to not-a-time */ + if (len <= 0 || (len == 3 && + tolower(str[0]) == 'n' && + tolower(str[1]) == 'a' && + tolower(str[2]) == 't')) { + out->year = NPY_DATETIME_NAT; + + /* + * Indicate that this was a special value, and + * recommend generic units. + */ + if (out_local != NULL) { + *out_local = 0; + } + if (out_bestunit != NULL) { + *out_bestunit = NPY_FR_GENERIC; + } + if (out_special != NULL) { + *out_special = 1; + } + + return 0; + } + + if (unit == NPY_FR_GENERIC) { + PyErr_SetString(PyExc_ValueError, + "Cannot create a NumPy datetime other than NaT " + "with generic units"); + return -1; + } + + /* + * The string "today" resolves to midnight of today's local date in UTC. + * This is perhaps a little weird, but done so that further truncation + * to a 'datetime64[D]' type produces the date you expect, rather than + * switching to an adjacent day depending on the current time and your + * timezone. + */ + if (len == 5 && tolower(str[0]) == 't' && + tolower(str[1]) == 'o' && + tolower(str[2]) == 'd' && + tolower(str[3]) == 'a' && + tolower(str[4]) == 'y') { + time_t rawtime = 0; + struct tm tm_; + + /* 'today' only works for units of days or larger */ + if (unit != -1 && unit > NPY_FR_D) { + PyErr_SetString(PyExc_ValueError, + "Special value 'today' can only be converted " + "to a NumPy datetime with 'D' or larger units"); + return -1; + } + + time(&rawtime); +#if defined(_WIN32) + if (localtime_s(&tm_, &rawtime) != 0) { + PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to " + "get local time"); + return -1; + } +#else + /* Other platforms may require something else */ + if (localtime_r(&rawtime, &tm_) == NULL) { + PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to " + "get local time"); + return -1; + } +#endif + out->year = tm_.tm_year + 1900; + out->month = tm_.tm_mon + 1; + out->day = tm_.tm_mday; + + bestunit = NPY_FR_D; + + /* + * Indicate that this was a special value, and + * is a date (unit 'D'). + */ + if (out_local != NULL) { + *out_local = 0; + } + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + if (out_special != NULL) { + *out_special = 1; + } + + /* Check the casting rule */ + if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, + casting)) { + PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit " + "'%s' using casting rule %s", + str, _datetime_strings[unit], + npy_casting_to_string(casting)); + return -1; + } + + return 0; + } + + /* The string "now" resolves to the current UTC time */ + if (len == 3 && tolower(str[0]) == 'n' && + tolower(str[1]) == 'o' && + tolower(str[2]) == 'w') { + time_t rawtime = 0; + PyArray_DatetimeMetaData meta; + + time(&rawtime); + + /* Set up a dummy metadata for the conversion */ + meta.base = NPY_FR_s; + meta.num = 1; + meta.events = 1; + + bestunit = NPY_FR_s; + + /* + * Indicate that this was a special value, and + * use 's' because the time() function has resolution + * seconds. + */ + if (out_local != NULL) { + *out_local = 0; + } + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + if (out_special != NULL) { + *out_special = 1; + } + + /* Check the casting rule */ + if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, + casting)) { + PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit " + "'%s' using casting rule %s", + str, _datetime_strings[unit], + npy_casting_to_string(casting)); + return -1; + } + + return convert_datetime_to_datetimestruct(&meta, rawtime, out); + } + + /* Anything else isn't a special value */ + if (out_special != NULL) { + *out_special = 0; + } + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (digits until the '-' character) */ + out->year = 0; + while (sublen > 0 && isdigit(*substr)) { + out->year = 10 * out->year + (*substr - '0'); + ++substr; + --sublen; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a '-' or the end of the string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + bestunit = NPY_FR_Y; + goto finish; + } + else if (*substr == '-') { + ++substr; + --sublen; + } + else { + goto parse_error; + } + + /* Can't have a trailing '-' */ + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE MONTH (2 digits) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + out->month = 10 * (substr[0] - '0') + (substr[1] - '0'); + + if (out->month < 1 || out->month > 12) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + goto error; + } + substr += 2; + sublen -= 2; + } + else { + goto parse_error; + } + + /* Next character must be a '-' or the end of the string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + bestunit = NPY_FR_M; + goto finish; + } + else if (*substr == '-') { + ++substr; + --sublen; + } + else { + goto parse_error; + } + + /* Can't have a trailing '-' */ + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE DAY (2 digits) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + out->day = 10 * (substr[0] - '0') + (substr[1] - '0'); + + if (out->day < 1 || + out->day > _days_per_month_table[year_leap][out->month-1]) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + goto error; + } + substr += 2; + sublen -= 2; + } + else { + goto parse_error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + bestunit = NPY_FR_D; + goto finish; + } + else if (*substr != 'T' && *substr != ' ') { + goto parse_error; + } + else { + ++substr; + --sublen; + } + + /* PARSE THE HOURS (2 digits) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + out->hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + + if (out->hour < 0 || out->hour >= 24) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", str); + goto error; + } + substr += 2; + sublen -= 2; + } + else { + goto parse_error; + } + + /* Next character must be a ':' or the end of the string */ + if (sublen > 0 && *substr == ':') { + ++substr; + --sublen; + } + else { + bestunit = NPY_FR_h; + goto parse_timezone; + } + + /* Can't have a trailing ':' */ + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE MINUTES (2 digits) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + out->min = 10 * (substr[0] - '0') + (substr[1] - '0'); + + if (out->hour < 0 || out->min >= 60) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", str); + goto error; + } + substr += 2; + sublen -= 2; + } + else { + goto parse_error; + } + + /* Next character must be a ':' or the end of the string */ + if (sublen > 0 && *substr == ':') { + ++substr; + --sublen; + } + else { + bestunit = NPY_FR_m; + goto parse_timezone; + } + + /* Can't have a trailing ':' */ + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE SECONDS (2 digits) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + out->sec = 10 * (substr[0] - '0') + (substr[1] - '0'); + + if (out->sec < 0 || out->sec >= 60) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", str); + goto error; + } + substr += 2; + sublen -= 2; + } + else { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + } + else { + bestunit = NPY_FR_s; + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_us; + } + else { + bestunit = NPY_FR_ms; + } + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + if (numdigits > 3) { + bestunit = NPY_FR_ps; + } + else { + bestunit = NPY_FR_ns; + } + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (numdigits > 3) { + bestunit = NPY_FR_as; + } + else { + bestunit = NPY_FR_fs; + } + +parse_timezone: + if (sublen == 0) { + /* + * ISO 8601 states to treat date-times without a timezone offset + * or 'Z' for UTC as local time. The C standard libary functions + * mktime and gmtime allow us to do this conversion. + * + * Only do this timezone adjustment for recent and future years. + */ + if (out->year > 1900 && out->year < 10000) { + time_t rawtime = 0; + struct tm tm_; + + tm_.tm_sec = out->sec; + tm_.tm_min = out->min; + tm_.tm_hour = out->hour; + tm_.tm_mday = out->day; + tm_.tm_mon = out->month - 1; + tm_.tm_year = out->year - 1900; + tm_.tm_isdst = -1; + + /* mktime converts a local 'struct tm' into a time_t */ + rawtime = mktime(&tm_); + if (rawtime == -1) { + PyErr_SetString(PyExc_OSError, "Failed to use mktime to " + "convert local time to UTC"); + goto error; + } + + /* gmtime converts a 'time_t' into a UTC 'struct tm' */ +#if defined(_WIN32) + if (gmtime_s(&tm_, &rawtime) != 0) { + PyErr_SetString(PyExc_OSError, "Failed to use gmtime_s to " + "get a UTC time"); + goto error; + } +#else + /* Other platforms may require something else */ + if (gmtime_r(&rawtime, &tm_) == NULL) { + PyErr_SetString(PyExc_OSError, "Failed to use gmtime_r to " + "get a UTC time"); + goto error; + } +#endif + out->sec = tm_.tm_sec; + out->min = tm_.tm_min; + out->hour = tm_.tm_hour; + out->day = tm_.tm_mday; + out->month = tm_.tm_mon + 1; + out->year = tm_.tm_year + 1900; + } + + /* Since neither "Z" nor a time-zone was specified, it's local */ + if (out_local != NULL) { + *out_local = 1; + } + + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + /* "Z" means not local */ + if (out_local != NULL) { + *out_local = 0; + } + + if (sublen == 1) { + goto finish; + } + else { + ++substr; + --sublen; + } + } + /* Time zone offset */ + else if (*substr == '-' || *substr == '+') { + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + if (out_local != NULL) { + *out_local = 0; + } + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", str); + goto error; + } + } + else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", str); + goto error; + } + } + else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + add_minutes_to_datetimestruct(out, -60 * offset_hour - offset_minute); + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen != 0) { + goto parse_error; + } + +finish: + if (out_bestunit != NULL) { + *out_bestunit = bestunit; + } + + /* Check the casting rule */ + if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, + casting)) { + PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit " + "'%s' using casting rule %s", + str, _datetime_strings[unit], + npy_casting_to_string(casting)); + return -1; + } + + return 0; + +parse_error: + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", + str, (int)(substr-str)); + return -1; + +error: + return -1; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +NPY_NO_EXPORT int +get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) +{ + int len = 0; + + /* If no unit is provided, return the maximum length */ + if (base == -1) { + return NPY_DATETIME_MAX_ISO8601_STRLEN; + } + + switch (base) { + /* Generic units can only be used to represent NaT */ + case NPY_FR_GENERIC: + return 4; + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } + else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string. + * + * If 'local' is non-zero, it produces a string in local time with + * a +-#### timezone offset, otherwise it uses timezone Z (UTC). + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is + * set to a value other than -1. This is a manual override for + * the local time zone to use, as an offset in minutes. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +NPY_NO_EXPORT int +make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int local, NPY_DATETIMEUNIT base, int tzoffset) +{ + npy_datetimestruct dts_local; + int timezone_offset = 0; + + char *substr = outstr, sublen = outlen; + int tmplen; + + /* Handle NaT, and treat a datetime with generic units as NaT */ + if (dts->year == NPY_DATETIME_NAT || base == NPY_FR_GENERIC) { + if (outlen < 4) { + goto string_too_short; + } + outstr[0] = 'N'; + outstr[0] = 'a'; + outstr[0] = 'T'; + outstr[0] = '\0'; + + return 0; + } + + /* Only do local time within a reasonable year range */ + if ((dts->year <= 1900 || dts->year >= 10000) && tzoffset == -1) { + local = 0; + } + + /* Automatically detect a good unit */ + if (base == -1) { + if (dts->as % 1000 != 0) { + base = NPY_FR_as; + } + else if (dts->as != 0) { + base = NPY_FR_fs; + } + else if (dts->ps % 1000 != 0) { + base = NPY_FR_ps; + } + else if (dts->ps != 0) { + base = NPY_FR_ns; + } + else if (dts->us % 1000 != 0) { + base = NPY_FR_us; + } + else if (dts->us != 0) { + base = NPY_FR_ms; + } + else if (dts->sec != 0) { + base = NPY_FR_s; + } + /* + * hours and minutes don't get split up by default, and printing + * in local time forces minutes + */ + else if (local || dts->min != 0 || dts->hour != 0) { + base = NPY_FR_m; + } + /* dates don't get split up by default */ + else { + base = NPY_FR_D; + } + } + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + else if (base == NPY_FR_W) { + base = NPY_FR_D; + } + + /* Printed dates have no time zone */ + if (base < NPY_FR_h) { + local = 0; + } + + /* Use the C API to convert from UTC to local time */ + if (local && tzoffset == -1) { + time_t rawtime = 0, localrawtime; + struct tm tm_; + + /* + * Convert everything in 'dts' to a time_t, to minutes precision. + * This is POSIX time, which skips leap-seconds, but because + * we drop the seconds value from the npy_datetimestruct, everything + * is ok for this operation. + */ + rawtime = (time_t)get_datetimestruct_days(dts) * 24 * 60 * 60; + rawtime += dts->hour * 60 * 60; + rawtime += dts->min * 60; + + /* localtime converts a 'time_t' into a local 'struct tm' */ +#if defined(_WIN32) + if (localtime_s(&tm_, &rawtime) != 0) { + PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to " + "get a local time"); + return -1; + } +#else + /* Other platforms may require something else */ + if (localtime_r(&rawtime, &tm_) == NULL) { + PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to " + "get a local time"); + return -1; + } +#endif + /* Make a copy of the npy_datetimestruct we can modify */ + dts_local = *dts; + + /* Copy back all the values except seconds */ + dts_local.min = tm_.tm_min; + dts_local.hour = tm_.tm_hour; + dts_local.day = tm_.tm_mday; + dts_local.month = tm_.tm_mon + 1; + dts_local.year = tm_.tm_year + 1900; + + /* Extract the timezone offset that was applied */ + rawtime /= 60; + localrawtime = (time_t)get_datetimestruct_days(&dts_local) * 24 * 60; + localrawtime += dts_local.hour * 60; + localrawtime += dts_local.min; + + timezone_offset = localrawtime - rawtime; + + /* Set dts to point to our local time instead of the UTC time */ + dts = &dts_local; + } + /* Use the manually provided tzoffset */ + else if (local) { + /* Make a copy of the npy_datetimestruct we can modify */ + dts_local = *dts; + dts = &dts_local; + + /* Set and apply the required timezone offset */ + timezone_offset = tzoffset; + add_minutes_to_datetimestruct(dts, timezone_offset); + } + + /* YEAR */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen >= sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + *substr = '\0'; + return 0; + } + + /* MONTH */ + substr[0] = '-'; + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + *substr = '\0'; + return 0; + } + + /* DAY */ + substr[0] = '-'; + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + *substr = '\0'; + return 0; + } + + /* HOUR */ + substr[0] = 'T'; + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + substr[0] = ':'; + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + substr[0] = ':'; + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + substr[0] = '.'; + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + if (sublen <= 4 ) { + goto string_too_short; + } + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr += 3; + sublen -= 3; + +add_time_zone: + if (local) { + /* Add the +/- sign */ + if (timezone_offset < 0) { + substr[0] = '-'; + timezone_offset = -timezone_offset; + } + else { + substr[0] = '+'; + } + if (sublen <= 1) { + goto string_too_short; + } + substr += 1; + sublen -= 1; + + /* Add the timezone offset */ + substr[0] = (char)((timezone_offset / (10*60)) % 10 + '0'); + if (sublen <= 1 ) { + goto string_too_short; + } + substr[1] = (char)((timezone_offset / 60) % 10 + '0'); + if (sublen <= 2 ) { + goto string_too_short; + } + substr[2] = (char)(((timezone_offset % 60) / 10) % 10 + '0'); + if (sublen <= 3 ) { + goto string_too_short; + } + substr[3] = (char)((timezone_offset % 60) % 10 + '0'); + if (sublen <= 4 ) { + goto string_too_short; + } + substr += 4; + sublen -= 4; + } + /* UTC "Zulu" time */ + else { + substr[0] = 'Z'; + if (sublen <= 1) { + goto string_too_short; + } + substr += 1; + sublen -= 1; + } + + /* Add a NULL terminator, and return */ + substr[0] = '\0'; + + return 0; + +string_too_short: + /* Put a NULL terminator on anyway */ + if (outlen > 0) { + outstr[outlen-1] = '\0'; + } + + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} + + diff --git a/numpy/core/src/multiarray/datetime_strings.h b/numpy/core/src/multiarray/datetime_strings.h new file mode 100644 index 000000000..2b48f49b9 --- /dev/null +++ b/numpy/core/src/multiarray/datetime_strings.h @@ -0,0 +1,77 @@ +#ifndef _NPY_PRIVATE__DATETIME_STRINGS_H_ +#define _NPY_PRIVATE__DATETIME_STRINGS_H_ + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + After the date and time, may place a ' ' followed by an event number. + * + The date "20100312" is parsed as the year 20100312, not as + * equivalent to "2010-03-12". The '-' in the dates are not optional. + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * 'unit' should contain -1 if the unit is unknown, or the unit + * which will be used if it is. + * 'casting' controls how the detected unit from the string is allowed + * to be cast to the 'unit' parameter. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * 'out_bestunit' gives a suggested unit based on the amount of + * resolution provided in the string, or -1 for NaT. + * 'out_special' gets set to 1 if the parsed time was 'today', + * 'now', or ''/'NaT'. For 'today', the unit recommended is + * 'D', for 'now', the unit recommended is 's', and for 'NaT' + * the unit recommended is 'Y'. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +parse_iso_8601_datetime(char *str, int len, + NPY_DATETIMEUNIT unit, + NPY_CASTING casting, + npy_datetimestruct *out, + npy_bool *out_local, + NPY_DATETIMEUNIT *out_bestunit, + npy_bool *out_special); + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +NPY_NO_EXPORT int +get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string. + * + * If 'local' is non-zero, it produces a string in local time with + * a +-#### timezone offset, otherwise it uses timezone Z (UTC). + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is + * set to a value other than -1. This is a manual override for + * the local time zone to use, as an offset in minutes. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +NPY_NO_EXPORT int +make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + int local, NPY_DATETIMEUNIT base, int tzoffset); + +#endif diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 3b136e072..72b1a5a65 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -45,6 +45,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "convert_datatype.h" #include "nditer_pywrap.h" #include "_datetime.h" +#include "datetime_strings.h" #include "datetime_busday.h" #include "datetime_busdaycal.h" @@ -2939,7 +2940,7 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, /* Zero the destination string completely */ memset(dataptr[1], 0, strsize); /* Convert that into a string */ - if (make_iso_8601_date(&dts, (char *)dataptr[1], strsize, + if (make_iso_8601_datetime(&dts, (char *)dataptr[1], strsize, local, unit, tzoffset) < 0) { goto fail; } diff --git a/numpy/core/src/multiarray/multiarraymodule_onefile.c b/numpy/core/src/multiarray/multiarraymodule_onefile.c index 4459e6b4c..bcfe73e0f 100644 --- a/numpy/core/src/multiarray/multiarraymodule_onefile.c +++ b/numpy/core/src/multiarray/multiarraymodule_onefile.c @@ -11,6 +11,7 @@ #include "scalarapi.c" #include "datetime.c" +#include "datetime_strings.c" #include "datetime_busday.c" #include "datetime_busdaycal.c" #include "arraytypes.c" diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 380ec3493..04ae37ffd 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -22,6 +22,7 @@ #include "common.h" #include "scalartypes.h" #include "_datetime.h" +#include "datetime_strings.h" NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[] = { {PyObject_HEAD_INIT(&PyBoolArrType_Type) 0}, @@ -618,7 +619,7 @@ datetimetype_repr(PyObject *self) return NULL; } - if (make_iso_8601_date(&dts, iso, sizeof(iso), 1, + if (make_iso_8601_datetime(&dts, iso, sizeof(iso), 1, scal->obmeta.base, -1) < 0) { return NULL; } @@ -679,7 +680,7 @@ datetimetype_str(PyObject *self) return NULL; } - if (make_iso_8601_date(&dts, iso, sizeof(iso), 1, + if (make_iso_8601_datetime(&dts, iso, sizeof(iso), 1, scal->obmeta.base, -1) < 0) { return NULL; } |