summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-06-16 17:36:45 -0500
committerMark Wiebe <mwiebe@enthought.com>2011-06-16 17:36:45 -0500
commit4df8499f5dc7ae043a3aedc6c988011c0d9840e7 (patch)
tree0f3792bf81f18608f27ea994218cb647847f689a
parent382233c3d0fb03f20224dbfbc21e803fd7078407 (diff)
downloadnumpy-4df8499f5dc7ae043a3aedc6c988011c0d9840e7.tar.gz
ENH: datetime-autounit: Detect unit from string arrays as well
Also move datetime string functions to their own source file.
-rw-r--r--numpy/core/SConscript1
-rw-r--r--numpy/core/code_generators/genapi.py1
-rw-r--r--numpy/core/setup.py1
-rw-r--r--numpy/core/src/multiarray/_datetime.h111
-rw-r--r--numpy/core/src/multiarray/datetime.c1358
-rw-r--r--numpy/core/src/multiarray/datetime_strings.c1219
-rw-r--r--numpy/core/src/multiarray/datetime_strings.h77
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c3
-rw-r--r--numpy/core/src/multiarray/multiarraymodule_onefile.c1
-rw-r--r--numpy/core/src/multiarray/scalartypes.c.src5
10 files changed, 1490 insertions, 1287 deletions
diff --git a/numpy/core/SConscript b/numpy/core/SConscript
index 0baea3d0c..3a563e2e8 100644
--- a/numpy/core/SConscript
+++ b/numpy/core/SConscript
@@ -439,6 +439,7 @@ if ENABLE_SEPARATE_COMPILATION:
pjoin('src', 'multiarray', 'hashdescr.c'),
pjoin('src', 'multiarray', 'arrayobject.c'),
pjoin('src', 'multiarray', 'datetime.c'),
+ pjoin('src', 'multiarray', 'datetime_strings.c'),
pjoin('src', 'multiarray', 'datetime_busday.c'),
pjoin('src', 'multiarray', 'datetime_busdaycal.c'),
pjoin('src', 'multiarray', 'numpyos.c'),
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index 844aebff0..f69424d31 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -44,6 +44,7 @@ API_FILES = [join('multiarray', 'methods.c'),
join('multiarray', 'conversion_utils.c'),
join('multiarray', 'buffer.c'),
join('multiarray', 'datetime.c'),
+ join('multiarray', 'datetime_strings.c'),
join('multiarray', 'datetime_busday.c'),
join('multiarray', 'datetime_busdaycal.c'),
join('multiarray', 'nditer.c.src'),
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 87e4e4f90..c8d348410 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -747,6 +747,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'numpymemoryview.c'),
join('src', 'multiarray', 'buffer.c'),
join('src', 'multiarray', 'datetime.c'),
+ join('src', 'multiarray', 'datetime_strings.c'),
join('src', 'multiarray', 'datetime_busday.c'),
join('src', 'multiarray', 'datetime_busdaycal.c'),
join('src', 'multiarray', 'numpyos.c'),
diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h
index a94abc6c7..e11e0eeeb 100644
--- a/numpy/core/src/multiarray/_datetime.h
+++ b/numpy/core/src/multiarray/_datetime.h
@@ -1,10 +1,26 @@
#ifndef _NPY_PRIVATE__DATETIME_H_
#define _NPY_PRIVATE__DATETIME_H_
+NPY_NO_EXPORT char *_datetime_strings[NPY_DATETIME_NUMUNITS];
+
+NPY_NO_EXPORT int _days_per_month_table[2][12];
+
NPY_NO_EXPORT void
numpy_pydatetime_import();
/*
+ * Returns 1 if the given year is a leap year, 0 otherwise.
+ */
+NPY_NO_EXPORT int
+is_leapyear(npy_int64 year);
+
+/*
+ * Calculates the days offset from the 1970 epoch.
+ */
+NPY_NO_EXPORT npy_int64
+get_datetimestruct_days(const npy_datetimestruct *dts);
+
+/*
* Creates a datetime or timedelta dtype using a copy of the provided metadata.
*/
NPY_NO_EXPORT PyArray_Descr *
@@ -104,6 +120,17 @@ datetime_metadata_divides(
int strict_with_nonlinear_units);
/*
+ * This provides the casting rules for the DATETIME data type units.
+ *
+ * Notably, there is a barrier between 'date units' and 'time units'
+ * for all but 'unsafe' casting.
+ */
+NPY_NO_EXPORT npy_bool
+can_cast_datetime64_units(NPY_DATETIMEUNIT src_unit,
+ NPY_DATETIMEUNIT dst_unit,
+ NPY_CASTING casting);
+
+/*
* This provides the casting rules for the DATETIME data type metadata.
*/
NPY_NO_EXPORT npy_bool
@@ -112,6 +139,17 @@ can_cast_datetime64_metadata(PyArray_DatetimeMetaData *src_meta,
NPY_CASTING casting);
/*
+ * This provides the casting rules for the TIMEDELTA data type units.
+ *
+ * Notably, there is a barrier between the nonlinear years and
+ * months units, and all the other units.
+ */
+NPY_NO_EXPORT npy_bool
+can_cast_timedelta64_units(NPY_DATETIMEUNIT src_unit,
+ NPY_DATETIMEUNIT dst_unit,
+ NPY_CASTING casting);
+
+/*
* This provides the casting rules for the TIMEDELTA data type metadata.
*/
NPY_NO_EXPORT npy_bool
@@ -196,79 +234,6 @@ append_metastr_to_string(PyArray_DatetimeMetaData *meta,
PyObject *ret);
/*
- * Provides a string length to use for converting datetime
- * objects with the given local and unit settings.
- */
-NPY_NO_EXPORT int
-get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
-
-/*
- * Parses (almost) standard ISO 8601 date strings. The differences are:
- *
- * + After the date and time, may place a ' ' followed by an event number.
- * + The date "20100312" is parsed as the year 20100312, not as
- * equivalent to "2010-03-12". The '-' in the dates are not optional.
- * + Only seconds may have a decimal point, with up to 18 digits after it
- * (maximum attoseconds precision).
- * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
- * the date and the time. Both are treated equivalently.
- * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats.
- * + Doesn't handle leap seconds (seconds value has 60 in these cases).
- * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
- * + Accepts special values "NaT" (not a time), "Today", (current
- * day according to local time) and "Now" (current time in UTC).
- *
- * 'str' must be a NULL-terminated string, and 'len' must be its length.
- * 'unit' should contain -1 if the unit is unknown, or the unit
- * which will be used if it is.
- * 'casting' controls how the detected unit from the string is allowed
- * to be cast to the 'unit' parameter.
- *
- * 'out' gets filled with the parsed date-time.
- * 'out_local' gets set to 1 if the parsed time was in local time,
- * to 0 otherwise. The values 'now' and 'today' don't get counted
- * as local, and neither do UTC +/-#### timezone offsets, because
- * they aren't using the computer's local timezone offset.
- * 'out_bestunit' gives a suggested unit based on the amount of
- * resolution provided in the string, or -1 for NaT.
- * 'out_special' gets set to 1 if the parsed time was 'today',
- * 'now', or ''/'NaT'. For 'today', the unit recommended is
- * 'D', for 'now', the unit recommended is 's', and for 'NaT'
- * the unit recommended is 'Y'.
- *
- * Returns 0 on success, -1 on failure.
- */
-NPY_NO_EXPORT int
-parse_iso_8601_date(char *str, int len,
- NPY_DATETIMEUNIT unit,
- NPY_CASTING casting,
- npy_datetimestruct *out,
- npy_bool *out_local,
- NPY_DATETIMEUNIT *out_bestunit,
- npy_bool *out_special);
-
-/*
- * Converts an npy_datetimestruct to an (almost) ISO 8601
- * NULL-terminated string.
- *
- * If 'local' is non-zero, it produces a string in local time with
- * a +-#### timezone offset, otherwise it uses timezone Z (UTC).
- *
- * 'base' restricts the output to that unit. Set 'base' to
- * -1 to auto-detect a base after which all the values are zero.
- *
- * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is
- * set to a value other than -1. This is a manual override for
- * the local time zone to use, as an offset in minutes.
- *
- * Returns 0 on success, -1 on failure (for example if the output
- * string was too short).
- */
-NPY_NO_EXPORT int
-make_iso_8601_date(npy_datetimestruct *dts, char *outstr, int outlen,
- int local, NPY_DATETIMEUNIT base, int tzoffset);
-
-/*
* Tests for and converts a Python datetime.datetime or datetime.date
* object into a NumPy npy_datetimestruct.
*
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index e344edcbd..301677895 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -1,5 +1,5 @@
/*
- * This file implements core functionality for NumPy datetime
+ * This file implements core functionality for NumPy datetime.
*
* Written by Mark Wiebe (mwwiebe@gmail.com)
* Copyright (c) 2011 by Enthought, Inc.
@@ -20,8 +20,8 @@
#include "numpy/npy_3kcompat.h"
#include "numpy/arrayscalars.h"
-#include "methods.h"
#include "_datetime.h"
+#include "datetime_strings.h"
/*
* Imports the PyDateTime functions so we can create these objects.
@@ -33,11 +33,8 @@ numpy_pydatetime_import()
PyDateTime_IMPORT;
}
-static int
-is_leapyear(npy_int64 year);
-
/* Exported as DATETIMEUNITS in multiarraymodule.c */
-NPY_NO_EXPORT char *_datetime_strings[] = {
+NPY_NO_EXPORT char *_datetime_strings[NPY_DATETIME_NUMUNITS] = {
NPY_STR_Y,
NPY_STR_M,
NPY_STR_W,
@@ -55,12 +52,15 @@ NPY_NO_EXPORT char *_datetime_strings[] = {
};
/* Days per month, regular year and leap year */
-static int days_in_month[2][12] = {
+NPY_NO_EXPORT int _days_per_month_table[2][12] = {
{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
{ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
};
-static int
+/*
+ * Returns 1 if the given year is a leap year, 0 otherwise.
+ */
+NPY_NO_EXPORT int
is_leapyear(npy_int64 year)
{
return (year & 0x3) == 0 && /* year % 4 == 0 */
@@ -71,7 +71,7 @@ is_leapyear(npy_int64 year)
/*
* Calculates the days offset from the 1970 epoch.
*/
-static npy_int64
+NPY_NO_EXPORT npy_int64
get_datetimestruct_days(const npy_datetimestruct *dts)
{
int i, month;
@@ -116,7 +116,7 @@ get_datetimestruct_days(const npy_datetimestruct *dts)
days += year / 400;
}
- month_lengths = days_in_month[is_leapyear(dts->year)];
+ month_lengths = _days_per_month_table[is_leapyear(dts->year)];
month = dts->month - 1;
/* Add the months */
@@ -181,7 +181,7 @@ days_to_month_number(npy_datetime days)
int *month_lengths, i;
year = days_to_yearsdays(&days);
- month_lengths = days_in_month[is_leapyear(year)];
+ month_lengths = _days_per_month_table[is_leapyear(year)];
for (i = 0; i < 12; ++i) {
if (days < month_lengths[i]) {
@@ -206,7 +206,7 @@ set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts)
int *month_lengths, i;
dts->year = days_to_yearsdays(&days);
- month_lengths = days_in_month[is_leapyear(dts->year)];
+ month_lengths = _days_per_month_table[is_leapyear(dts->year)];
for (i = 0; i < 12; ++i) {
if (days < month_lengths[i]) {
@@ -2424,12 +2424,12 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes)
dts->month = 12;
}
isleap = is_leapyear(dts->year);
- dts->day += days_in_month[isleap][dts->month-1];
+ dts->day += _days_per_month_table[isleap][dts->month-1];
}
else if (dts->day > 28) {
isleap = is_leapyear(dts->year);
- if (dts->day > days_in_month[isleap][dts->month-1]) {
- dts->day -= days_in_month[isleap][dts->month-1];
+ if (dts->day > _days_per_month_table[isleap][dts->month-1]) {
+ dts->day -= _days_per_month_table[isleap][dts->month-1];
dts->month++;
if (dts->month > 12) {
dts->year++;
@@ -2440,1199 +2440,6 @@ add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes)
}
/*
- * Parses (almost) standard ISO 8601 date strings. The differences are:
- *
- * + After the date and time, may place a ' ' followed by an event number.
- * + The date "20100312" is parsed as the year 20100312, not as
- * equivalent to "2010-03-12". The '-' in the dates are not optional.
- * + Only seconds may have a decimal point, with up to 18 digits after it
- * (maximum attoseconds precision).
- * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
- * the date and the time. Both are treated equivalently.
- * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats.
- * + Doesn't handle leap seconds (seconds value has 60 in these cases).
- * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
- * + Accepts special values "NaT" (not a time), "Today", (current
- * day according to local time) and "Now" (current time in UTC).
- *
- * 'str' must be a NULL-terminated string, and 'len' must be its length.
- * 'unit' should contain -1 if the unit is unknown, or the unit
- * which will be used if it is.
- * 'casting' controls how the detected unit from the string is allowed
- * to be cast to the 'unit' parameter.
- *
- * 'out' gets filled with the parsed date-time.
- * 'out_local' gets set to 1 if the parsed time was in local time,
- * to 0 otherwise. The values 'now' and 'today' don't get counted
- * as local, and neither do UTC +/-#### timezone offsets, because
- * they aren't using the computer's local timezone offset.
- * 'out_bestunit' gives a suggested unit based on the amount of
- * resolution provided in the string, or -1 for NaT.
- * 'out_special' gets set to 1 if the parsed time was 'today',
- * 'now', or ''/'NaT'. For 'today', the unit recommended is
- * 'D', for 'now', the unit recommended is 's', and for 'NaT'
- * the unit recommended is 'Y'.
- *
- * Returns 0 on success, -1 on failure.
- */
-NPY_NO_EXPORT int
-parse_iso_8601_date(char *str, int len,
- NPY_DATETIMEUNIT unit,
- NPY_CASTING casting,
- npy_datetimestruct *out,
- npy_bool *out_local,
- NPY_DATETIMEUNIT *out_bestunit,
- npy_bool *out_special)
-{
- int year_leap = 0;
- int i, numdigits;
- char *substr, sublen;
- NPY_DATETIMEUNIT bestunit;
-
- /* Initialize the output to all zeros */
- memset(out, 0, sizeof(npy_datetimestruct));
- out->month = 1;
- out->day = 1;
-
- /* The empty string and case-variants of "NaT" parse to not-a-time */
- if (len <= 0 || (len == 3 &&
- tolower(str[0]) == 'n' &&
- tolower(str[1]) == 'a' &&
- tolower(str[2]) == 't')) {
- out->year = NPY_DATETIME_NAT;
-
- /*
- * Indicate that this was a special value, and
- * recommend generic units.
- */
- if (out_local != NULL) {
- *out_local = 0;
- }
- if (out_bestunit != NULL) {
- *out_bestunit = NPY_FR_GENERIC;
- }
- if (out_special != NULL) {
- *out_special = 1;
- }
-
- return 0;
- }
-
- if (unit == NPY_FR_GENERIC) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot create a NumPy datetime other than NaT "
- "with generic units");
- return -1;
- }
-
- /*
- * The string "today" resolves to midnight of today's local date in UTC.
- * This is perhaps a little weird, but done so that further truncation
- * to a 'datetime64[D]' type produces the date you expect, rather than
- * switching to an adjacent day depending on the current time and your
- * timezone.
- */
- if (len == 5 && tolower(str[0]) == 't' &&
- tolower(str[1]) == 'o' &&
- tolower(str[2]) == 'd' &&
- tolower(str[3]) == 'a' &&
- tolower(str[4]) == 'y') {
- time_t rawtime = 0;
- struct tm tm_;
-
- /* 'today' only works for units of days or larger */
- if (unit != -1 && unit > NPY_FR_D) {
- PyErr_SetString(PyExc_ValueError,
- "Special value 'today' can only be converted "
- "to a NumPy datetime with 'D' or larger units");
- return -1;
- }
-
- time(&rawtime);
-#if defined(_WIN32)
- if (localtime_s(&tm_, &rawtime) != 0) {
- PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to "
- "get local time");
- return -1;
- }
-#else
- /* Other platforms may require something else */
- if (localtime_r(&rawtime, &tm_) == NULL) {
- PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to "
- "get local time");
- return -1;
- }
-#endif
- out->year = tm_.tm_year + 1900;
- out->month = tm_.tm_mon + 1;
- out->day = tm_.tm_mday;
-
- bestunit = NPY_FR_D;
-
- /*
- * Indicate that this was a special value, and
- * is a date (unit 'D').
- */
- if (out_local != NULL) {
- *out_local = 0;
- }
- if (out_bestunit != NULL) {
- *out_bestunit = bestunit;
- }
- if (out_special != NULL) {
- *out_special = 1;
- }
-
- /* Check the casting rule */
- if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
- casting)) {
- PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit "
- "'%s' using casting rule %s",
- str, _datetime_strings[unit],
- npy_casting_to_string(casting));
- return -1;
- }
-
- return 0;
- }
-
- /* The string "now" resolves to the current UTC time */
- if (len == 3 && tolower(str[0]) == 'n' &&
- tolower(str[1]) == 'o' &&
- tolower(str[2]) == 'w') {
- time_t rawtime = 0;
- PyArray_DatetimeMetaData meta;
-
- time(&rawtime);
-
- /* Set up a dummy metadata for the conversion */
- meta.base = NPY_FR_s;
- meta.num = 1;
- meta.events = 1;
-
- bestunit = NPY_FR_s;
-
- /*
- * Indicate that this was a special value, and
- * use 's' because the time() function has resolution
- * seconds.
- */
- if (out_local != NULL) {
- *out_local = 0;
- }
- if (out_bestunit != NULL) {
- *out_bestunit = bestunit;
- }
- if (out_special != NULL) {
- *out_special = 1;
- }
-
- /* Check the casting rule */
- if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
- casting)) {
- PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit "
- "'%s' using casting rule %s",
- str, _datetime_strings[unit],
- npy_casting_to_string(casting));
- return -1;
- }
-
- return convert_datetime_to_datetimestruct(&meta, rawtime, out);
- }
-
- /* Anything else isn't a special value */
- if (out_special != NULL) {
- *out_special = 0;
- }
-
- substr = str;
- sublen = len;
-
- /* Skip leading whitespace */
- while (sublen > 0 && isspace(*substr)) {
- ++substr;
- --sublen;
- }
-
- /* Leading '-' sign for negative year */
- if (*substr == '-') {
- ++substr;
- --sublen;
- }
-
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE YEAR (digits until the '-' character) */
- out->year = 0;
- while (sublen > 0 && isdigit(*substr)) {
- out->year = 10 * out->year + (*substr - '0');
- ++substr;
- --sublen;
- }
-
- /* Negate the year if necessary */
- if (str[0] == '-') {
- out->year = -out->year;
- }
- /* Check whether it's a leap-year */
- year_leap = is_leapyear(out->year);
-
- /* Next character must be a '-' or the end of the string */
- if (sublen == 0) {
- if (out_local != NULL) {
- *out_local = 0;
- }
- bestunit = NPY_FR_Y;
- goto finish;
- }
- else if (*substr == '-') {
- ++substr;
- --sublen;
- }
- else {
- goto parse_error;
- }
-
- /* Can't have a trailing '-' */
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE MONTH (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->month = 10 * (substr[0] - '0') + (substr[1] - '0');
-
- if (out->month < 1 || out->month > 12) {
- PyErr_Format(PyExc_ValueError,
- "Month out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
- }
- else {
- goto parse_error;
- }
-
- /* Next character must be a '-' or the end of the string */
- if (sublen == 0) {
- if (out_local != NULL) {
- *out_local = 0;
- }
- bestunit = NPY_FR_M;
- goto finish;
- }
- else if (*substr == '-') {
- ++substr;
- --sublen;
- }
- else {
- goto parse_error;
- }
-
- /* Can't have a trailing '-' */
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE DAY (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->day = 10 * (substr[0] - '0') + (substr[1] - '0');
-
- if (out->day < 1 ||
- out->day > days_in_month[year_leap][out->month-1]) {
- PyErr_Format(PyExc_ValueError,
- "Day out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
- }
- else {
- goto parse_error;
- }
-
- /* Next character must be a 'T', ' ', or end of string */
- if (sublen == 0) {
- if (out_local != NULL) {
- *out_local = 0;
- }
- bestunit = NPY_FR_D;
- goto finish;
- }
- else if (*substr != 'T' && *substr != ' ') {
- goto parse_error;
- }
- else {
- ++substr;
- --sublen;
- }
-
- /* PARSE THE HOURS (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->hour = 10 * (substr[0] - '0') + (substr[1] - '0');
-
- if (out->hour < 0 || out->hour >= 24) {
- PyErr_Format(PyExc_ValueError,
- "Hours out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
- }
- else {
- goto parse_error;
- }
-
- /* Next character must be a ':' or the end of the string */
- if (sublen > 0 && *substr == ':') {
- ++substr;
- --sublen;
- }
- else {
- bestunit = NPY_FR_h;
- goto parse_timezone;
- }
-
- /* Can't have a trailing ':' */
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE MINUTES (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->min = 10 * (substr[0] - '0') + (substr[1] - '0');
-
- if (out->hour < 0 || out->min >= 60) {
- PyErr_Format(PyExc_ValueError,
- "Minutes out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
- }
- else {
- goto parse_error;
- }
-
- /* Next character must be a ':' or the end of the string */
- if (sublen > 0 && *substr == ':') {
- ++substr;
- --sublen;
- }
- else {
- bestunit = NPY_FR_m;
- goto parse_timezone;
- }
-
- /* Can't have a trailing ':' */
- if (sublen == 0) {
- goto parse_error;
- }
-
- /* PARSE THE SECONDS (2 digits) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- out->sec = 10 * (substr[0] - '0') + (substr[1] - '0');
-
- if (out->sec < 0 || out->sec >= 60) {
- PyErr_Format(PyExc_ValueError,
- "Seconds out of range in datetime string \"%s\"", str);
- goto error;
- }
- substr += 2;
- sublen -= 2;
- }
- else {
- goto parse_error;
- }
-
- /* Next character may be a '.' indicating fractional seconds */
- if (sublen > 0 && *substr == '.') {
- ++substr;
- --sublen;
- }
- else {
- bestunit = NPY_FR_s;
- goto parse_timezone;
- }
-
- /* PARSE THE MICROSECONDS (0 to 6 digits) */
- numdigits = 0;
- for (i = 0; i < 6; ++i) {
- out->us *= 10;
- if (sublen > 0 && isdigit(*substr)) {
- out->us += (*substr - '0');
- ++substr;
- --sublen;
- ++numdigits;
- }
- }
-
- if (sublen == 0 || !isdigit(*substr)) {
- if (numdigits > 3) {
- bestunit = NPY_FR_us;
- }
- else {
- bestunit = NPY_FR_ms;
- }
- goto parse_timezone;
- }
-
- /* PARSE THE PICOSECONDS (0 to 6 digits) */
- numdigits = 0;
- for (i = 0; i < 6; ++i) {
- out->ps *= 10;
- if (sublen > 0 && isdigit(*substr)) {
- out->ps += (*substr - '0');
- ++substr;
- --sublen;
- ++numdigits;
- }
- }
-
- if (sublen == 0 || !isdigit(*substr)) {
- if (numdigits > 3) {
- bestunit = NPY_FR_ps;
- }
- else {
- bestunit = NPY_FR_ns;
- }
- goto parse_timezone;
- }
-
- /* PARSE THE ATTOSECONDS (0 to 6 digits) */
- numdigits = 0;
- for (i = 0; i < 6; ++i) {
- out->as *= 10;
- if (sublen > 0 && isdigit(*substr)) {
- out->as += (*substr - '0');
- ++substr;
- --sublen;
- ++numdigits;
- }
- }
-
- if (numdigits > 3) {
- bestunit = NPY_FR_as;
- }
- else {
- bestunit = NPY_FR_fs;
- }
-
-parse_timezone:
- if (sublen == 0) {
- /*
- * ISO 8601 states to treat date-times without a timezone offset
- * or 'Z' for UTC as local time. The C standard libary functions
- * mktime and gmtime allow us to do this conversion.
- *
- * Only do this timezone adjustment for recent and future years.
- */
- if (out->year > 1900 && out->year < 10000) {
- time_t rawtime = 0;
- struct tm tm_;
-
- tm_.tm_sec = out->sec;
- tm_.tm_min = out->min;
- tm_.tm_hour = out->hour;
- tm_.tm_mday = out->day;
- tm_.tm_mon = out->month - 1;
- tm_.tm_year = out->year - 1900;
- tm_.tm_isdst = -1;
-
- /* mktime converts a local 'struct tm' into a time_t */
- rawtime = mktime(&tm_);
- if (rawtime == -1) {
- PyErr_SetString(PyExc_OSError, "Failed to use mktime to "
- "convert local time to UTC");
- goto error;
- }
-
- /* gmtime converts a 'time_t' into a UTC 'struct tm' */
-#if defined(_WIN32)
- if (gmtime_s(&tm_, &rawtime) != 0) {
- PyErr_SetString(PyExc_OSError, "Failed to use gmtime_s to "
- "get a UTC time");
- goto error;
- }
-#else
- /* Other platforms may require something else */
- if (gmtime_r(&rawtime, &tm_) == NULL) {
- PyErr_SetString(PyExc_OSError, "Failed to use gmtime_r to "
- "get a UTC time");
- goto error;
- }
-#endif
- out->sec = tm_.tm_sec;
- out->min = tm_.tm_min;
- out->hour = tm_.tm_hour;
- out->day = tm_.tm_mday;
- out->month = tm_.tm_mon + 1;
- out->year = tm_.tm_year + 1900;
- }
-
- /* Since neither "Z" nor a time-zone was specified, it's local */
- if (out_local != NULL) {
- *out_local = 1;
- }
-
- goto finish;
- }
-
- /* UTC specifier */
- if (*substr == 'Z') {
- /* "Z" means not local */
- if (out_local != NULL) {
- *out_local = 0;
- }
-
- if (sublen == 1) {
- goto finish;
- }
- else {
- ++substr;
- --sublen;
- }
- }
- /* Time zone offset */
- else if (*substr == '-' || *substr == '+') {
- int offset_neg = 0, offset_hour = 0, offset_minute = 0;
-
- /*
- * Since "local" means local with respect to the current
- * machine, we say this is non-local.
- */
- if (out_local != NULL) {
- *out_local = 0;
- }
-
- if (*substr == '-') {
- offset_neg = 1;
- }
- ++substr;
- --sublen;
-
- /* The hours offset */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0');
- substr += 2;
- sublen -= 2;
- if (offset_hour >= 24) {
- PyErr_Format(PyExc_ValueError,
- "Timezone hours offset out of range "
- "in datetime string \"%s\"", str);
- goto error;
- }
- }
- else {
- goto parse_error;
- }
-
- /* The minutes offset is optional */
- if (sublen > 0) {
- /* Optional ':' */
- if (*substr == ':') {
- ++substr;
- --sublen;
- }
-
- /* The minutes offset (at the end of the string) */
- if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
- offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0');
- substr += 2;
- sublen -= 2;
- if (offset_minute >= 60) {
- PyErr_Format(PyExc_ValueError,
- "Timezone minutes offset out of range "
- "in datetime string \"%s\"", str);
- goto error;
- }
- }
- else {
- goto parse_error;
- }
- }
-
- /* Apply the time zone offset */
- if (offset_neg) {
- offset_hour = -offset_hour;
- offset_minute = -offset_minute;
- }
- add_minutes_to_datetimestruct(out, -60 * offset_hour - offset_minute);
- }
-
- /* Skip trailing whitespace */
- while (sublen > 0 && isspace(*substr)) {
- ++substr;
- --sublen;
- }
-
- if (sublen != 0) {
- goto parse_error;
- }
-
-finish:
- if (out_bestunit != NULL) {
- *out_bestunit = bestunit;
- }
-
- /* Check the casting rule */
- if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
- casting)) {
- PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit "
- "'%s' using casting rule %s",
- str, _datetime_strings[unit],
- npy_casting_to_string(casting));
- return -1;
- }
-
- return 0;
-
-parse_error:
- PyErr_Format(PyExc_ValueError,
- "Error parsing datetime string \"%s\" at position %d",
- str, (int)(substr-str));
- return -1;
-
-error:
- return -1;
-}
-
-/*
- * Provides a string length to use for converting datetime
- * objects with the given local and unit settings.
- */
-NPY_NO_EXPORT int
-get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
-{
- int len = 0;
-
- /* If no unit is provided, return the maximum length */
- if (base == -1) {
- return NPY_DATETIME_MAX_ISO8601_STRLEN;
- }
-
- switch (base) {
- /* Generic units can only be used to represent NaT */
- case NPY_FR_GENERIC:
- return 4;
- case NPY_FR_as:
- len += 3; /* "###" */
- case NPY_FR_fs:
- len += 3; /* "###" */
- case NPY_FR_ps:
- len += 3; /* "###" */
- case NPY_FR_ns:
- len += 3; /* "###" */
- case NPY_FR_us:
- len += 3; /* "###" */
- case NPY_FR_ms:
- len += 4; /* ".###" */
- case NPY_FR_s:
- len += 3; /* ":##" */
- case NPY_FR_m:
- len += 3; /* ":##" */
- case NPY_FR_h:
- len += 3; /* "T##" */
- case NPY_FR_D:
- case NPY_FR_W:
- len += 3; /* "-##" */
- case NPY_FR_M:
- len += 3; /* "-##" */
- case NPY_FR_Y:
- len += 21; /* 64-bit year */
- break;
- }
-
- if (base >= NPY_FR_h) {
- if (local) {
- len += 5; /* "+####" or "-####" */
- }
- else {
- len += 1; /* "Z" */
- }
- }
-
- len += 1; /* NULL terminator */
-
- return len;
-}
-
-/*
- * Converts an npy_datetimestruct to an (almost) ISO 8601
- * NULL-terminated string.
- *
- * If 'local' is non-zero, it produces a string in local time with
- * a +-#### timezone offset, otherwise it uses timezone Z (UTC).
- *
- * 'base' restricts the output to that unit. Set 'base' to
- * -1 to auto-detect a base after which all the values are zero.
- *
- * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is
- * set to a value other than -1. This is a manual override for
- * the local time zone to use, as an offset in minutes.
- *
- * Returns 0 on success, -1 on failure (for example if the output
- * string was too short).
- */
-NPY_NO_EXPORT int
-make_iso_8601_date(npy_datetimestruct *dts, char *outstr, int outlen,
- int local, NPY_DATETIMEUNIT base, int tzoffset)
-{
- npy_datetimestruct dts_local;
- int timezone_offset = 0;
-
- char *substr = outstr, sublen = outlen;
- int tmplen;
-
- /* Handle NaT, and treat a datetime with generic units as NaT */
- if (dts->year == NPY_DATETIME_NAT || base == NPY_FR_GENERIC) {
- if (outlen < 4) {
- goto string_too_short;
- }
- outstr[0] = 'N';
- outstr[0] = 'a';
- outstr[0] = 'T';
- outstr[0] = '\0';
-
- return 0;
- }
-
- /* Only do local time within a reasonable year range */
- if ((dts->year <= 1900 || dts->year >= 10000) && tzoffset == -1) {
- local = 0;
- }
-
- /* Automatically detect a good unit */
- if (base == -1) {
- if (dts->as % 1000 != 0) {
- base = NPY_FR_as;
- }
- else if (dts->as != 0) {
- base = NPY_FR_fs;
- }
- else if (dts->ps % 1000 != 0) {
- base = NPY_FR_ps;
- }
- else if (dts->ps != 0) {
- base = NPY_FR_ns;
- }
- else if (dts->us % 1000 != 0) {
- base = NPY_FR_us;
- }
- else if (dts->us != 0) {
- base = NPY_FR_ms;
- }
- else if (dts->sec != 0) {
- base = NPY_FR_s;
- }
- /*
- * hours and minutes don't get split up by default, and printing
- * in local time forces minutes
- */
- else if (local || dts->min != 0 || dts->hour != 0) {
- base = NPY_FR_m;
- }
- /* dates don't get split up by default */
- else {
- base = NPY_FR_D;
- }
- }
- /*
- * Print weeks with the same precision as days.
- *
- * TODO: Could print weeks with YYYY-Www format if the week
- * epoch is a Monday.
- */
- else if (base == NPY_FR_W) {
- base = NPY_FR_D;
- }
-
- /* Printed dates have no time zone */
- if (base < NPY_FR_h) {
- local = 0;
- }
-
- /* Use the C API to convert from UTC to local time */
- if (local && tzoffset == -1) {
- time_t rawtime = 0, localrawtime;
- struct tm tm_;
-
- /*
- * Convert everything in 'dts' to a time_t, to minutes precision.
- * This is POSIX time, which skips leap-seconds, but because
- * we drop the seconds value from the npy_datetimestruct, everything
- * is ok for this operation.
- */
- rawtime = (time_t)get_datetimestruct_days(dts) * 24 * 60 * 60;
- rawtime += dts->hour * 60 * 60;
- rawtime += dts->min * 60;
-
- /* localtime converts a 'time_t' into a local 'struct tm' */
-#if defined(_WIN32)
- if (localtime_s(&tm_, &rawtime) != 0) {
- PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to "
- "get a local time");
- return -1;
- }
-#else
- /* Other platforms may require something else */
- if (localtime_r(&rawtime, &tm_) == NULL) {
- PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to "
- "get a local time");
- return -1;
- }
-#endif
- /* Make a copy of the npy_datetimestruct we can modify */
- dts_local = *dts;
-
- /* Copy back all the values except seconds */
- dts_local.min = tm_.tm_min;
- dts_local.hour = tm_.tm_hour;
- dts_local.day = tm_.tm_mday;
- dts_local.month = tm_.tm_mon + 1;
- dts_local.year = tm_.tm_year + 1900;
-
- /* Extract the timezone offset that was applied */
- rawtime /= 60;
- localrawtime = (time_t)get_datetimestruct_days(&dts_local) * 24 * 60;
- localrawtime += dts_local.hour * 60;
- localrawtime += dts_local.min;
-
- timezone_offset = localrawtime - rawtime;
-
- /* Set dts to point to our local time instead of the UTC time */
- dts = &dts_local;
- }
- /* Use the manually provided tzoffset */
- else if (local) {
- /* Make a copy of the npy_datetimestruct we can modify */
- dts_local = *dts;
- dts = &dts_local;
-
- /* Set and apply the required timezone offset */
- timezone_offset = tzoffset;
- add_minutes_to_datetimestruct(dts, timezone_offset);
- }
-
- /* YEAR */
-#ifdef _WIN32
- tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year);
-#else
- tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year);
-#endif
- /* If it ran out of space or there isn't space for the NULL terminator */
- if (tmplen < 0 || tmplen >= sublen) {
- goto string_too_short;
- }
- substr += tmplen;
- sublen -= tmplen;
-
- /* Stop if the unit is years */
- if (base == NPY_FR_Y) {
- *substr = '\0';
- return 0;
- }
-
- /* MONTH */
- substr[0] = '-';
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->month / 10) + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->month % 10) + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is months */
- if (base == NPY_FR_M) {
- *substr = '\0';
- return 0;
- }
-
- /* DAY */
- substr[0] = '-';
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->day / 10) + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->day % 10) + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is days */
- if (base == NPY_FR_D) {
- *substr = '\0';
- return 0;
- }
-
- /* HOUR */
- substr[0] = 'T';
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->hour / 10) + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->hour % 10) + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is hours */
- if (base == NPY_FR_h) {
- goto add_time_zone;
- }
-
- /* MINUTE */
- substr[0] = ':';
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->min / 10) + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->min % 10) + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is minutes */
- if (base == NPY_FR_m) {
- goto add_time_zone;
- }
-
- /* SECOND */
- substr[0] = ':';
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->sec / 10) + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->sec % 10) + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is seconds */
- if (base == NPY_FR_s) {
- goto add_time_zone;
- }
-
- /* MILLISECOND */
- substr[0] = '.';
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->us / 100000) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->us / 10000) % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr[3] = (char)((dts->us / 1000) % 10 + '0');
- if (sublen <= 4 ) {
- goto string_too_short;
- }
- substr += 4;
- sublen -= 4;
-
- /* Stop if the unit is milliseconds */
- if (base == NPY_FR_ms) {
- goto add_time_zone;
- }
-
- /* MICROSECOND */
- substr[0] = (char)((dts->us / 100) % 10 + '0');
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->us / 10) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)(dts->us % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is microseconds */
- if (base == NPY_FR_us) {
- goto add_time_zone;
- }
-
- /* NANOSECOND */
- substr[0] = (char)((dts->ps / 100000) % 10 + '0');
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->ps / 10000) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->ps / 1000) % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is nanoseconds */
- if (base == NPY_FR_ns) {
- goto add_time_zone;
- }
-
- /* PICOSECOND */
- substr[0] = (char)((dts->ps / 100) % 10 + '0');
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->ps / 10) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)(dts->ps % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is picoseconds */
- if (base == NPY_FR_ps) {
- goto add_time_zone;
- }
-
- /* FEMTOSECOND */
- substr[0] = (char)((dts->as / 100000) % 10 + '0');
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->as / 10000) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)((dts->as / 1000) % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
- /* Stop if the unit is femtoseconds */
- if (base == NPY_FR_fs) {
- goto add_time_zone;
- }
-
- /* ATTOSECOND */
- substr[0] = (char)((dts->as / 100) % 10 + '0');
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((dts->as / 10) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)(dts->as % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr += 3;
- sublen -= 3;
-
-add_time_zone:
- if (local) {
- /* Add the +/- sign */
- if (timezone_offset < 0) {
- substr[0] = '-';
- timezone_offset = -timezone_offset;
- }
- else {
- substr[0] = '+';
- }
- if (sublen <= 1) {
- goto string_too_short;
- }
- substr += 1;
- sublen -= 1;
-
- /* Add the timezone offset */
- substr[0] = (char)((timezone_offset / (10*60)) % 10 + '0');
- if (sublen <= 1 ) {
- goto string_too_short;
- }
- substr[1] = (char)((timezone_offset / 60) % 10 + '0');
- if (sublen <= 2 ) {
- goto string_too_short;
- }
- substr[2] = (char)(((timezone_offset % 60) / 10) % 10 + '0');
- if (sublen <= 3 ) {
- goto string_too_short;
- }
- substr[3] = (char)((timezone_offset % 60) % 10 + '0');
- if (sublen <= 4 ) {
- goto string_too_short;
- }
- substr += 4;
- sublen -= 4;
- }
- /* UTC "Zulu" time */
- else {
- substr[0] = 'Z';
- if (sublen <= 1) {
- goto string_too_short;
- }
- substr += 1;
- sublen -= 1;
- }
-
- /* Add a NULL terminator, and return */
- substr[0] = '\0';
-
- return 0;
-
-string_too_short:
- /* Put a NULL terminator on anyway */
- if (outlen > 0) {
- outstr[outlen-1] = '\0';
- }
-
- PyErr_Format(PyExc_RuntimeError,
- "The string provided for NumPy ISO datetime formatting "
- "was too short, with length %d",
- outlen);
- return -1;
-}
-
-/*
* Tests for and converts a Python datetime.datetime or datetime.date
* object into a NumPy npy_datetimestruct.
*
@@ -3707,7 +2514,7 @@ convert_pydatetime_to_datetimestruct(PyObject *obj, npy_datetimestruct *out,
goto invalid_date;
}
isleap = is_leapyear(out->year);
- if (out->day < 1 || out->day > days_in_month[isleap][out->month-1]) {
+ if (out->day < 1 || out->day > _days_per_month_table[isleap][out->month-1]) {
goto invalid_date;
}
@@ -3879,7 +2686,7 @@ convert_pyobject_to_datetime(PyArray_DatetimeMetaData *meta, PyObject *obj,
}
/* Parse the ISO date */
- if (parse_iso_8601_date(str, len, meta->base, NPY_SAFE_CASTING,
+ if (parse_iso_8601_datetime(str, len, meta->base, NPY_SAFE_CASTING,
&dts, NULL, &bestunit, NULL) < 0) {
Py_DECREF(bytes);
return -1;
@@ -4851,6 +3658,129 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
return ret;
}
+/*
+ * Examines all the strings in the given string array, and parses them
+ * to find the right metadata.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+static int
+find_string_array_datetime64_type(PyObject *obj,
+ PyArray_DatetimeMetaData *meta)
+{
+ NpyIter* iter;
+ NpyIter_IterNextFunc *iternext;
+ char **dataptr;
+ npy_intp *strideptr, *innersizeptr;
+ PyArray_Descr *string_dtype;
+ int maxlen, len;
+ char *tmp_buffer = NULL;
+
+ npy_datetimestruct dts;
+ PyArray_DatetimeMetaData tmp_meta;
+
+ /* Handle zero-sized arrays specially */
+ if (PyArray_SIZE(obj) == 0) {
+ return 0;
+ }
+
+ string_dtype = PyArray_DescrFromType(NPY_STRING);
+ if (string_dtype == NULL) {
+ return -1;
+ }
+
+ /* Use unsafe casting to allow unicode -> ascii string */
+ iter = NpyIter_New((PyArrayObject *)obj, NPY_ITER_READONLY|
+ NPY_ITER_EXTERNAL_LOOP,
+ NPY_KEEPORDER, NPY_UNSAFE_CASTING,
+ string_dtype);
+ Py_DECREF(string_dtype);
+ if (iter == NULL) {
+ return -1;
+ }
+
+ iternext = NpyIter_GetIterNext(iter, NULL);
+ if (iternext == NULL) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+ dataptr = NpyIter_GetDataPtrArray(iter);
+ strideptr = NpyIter_GetInnerStrideArray(iter);
+ innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
+
+ /* Get the resulting string length */
+ maxlen = NpyIter_GetDescrArray(iter)[0]->elsize;
+
+ /* Allocate a buffer for strings which fill the buffer completely */
+ tmp_buffer = PyArray_malloc(maxlen+1);
+ if (tmp_buffer == NULL) {
+ PyErr_NoMemory();
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+
+ /* The iteration loop */
+ do {
+ /* Get the inner loop data/stride/count values */
+ char* data = *dataptr;
+ npy_intp stride = *strideptr;
+ npy_intp count = *innersizeptr;
+
+ /* The inner loop */
+ while (count--) {
+ len = strnlen(data, maxlen);
+
+ /* If the string is all full, use the buffer */
+ if (len == maxlen) {
+ memcpy(tmp_buffer, data, len);
+ tmp_buffer[len] = '\0';
+
+ tmp_meta.base = -1;
+ if (parse_iso_8601_datetime(tmp_buffer, len, -1,
+ NPY_UNSAFE_CASTING, &dts, NULL,
+ &tmp_meta.base, NULL) < 0) {
+ goto fail;
+ }
+ }
+ /* Otherwise parse the data in place */
+ else {
+ tmp_meta.base = -1;
+ if (parse_iso_8601_datetime(data, len, -1,
+ NPY_UNSAFE_CASTING, &dts, NULL,
+ &tmp_meta.base, NULL) < 0) {
+ goto fail;
+ }
+ }
+
+ tmp_meta.num = 1;
+ tmp_meta.events = 1;
+ /* Combine it with 'meta' */
+ if (compute_datetime_metadata_greatest_common_divisor(meta,
+ &tmp_meta, meta, 0, 0) < 0) {
+ goto fail;
+ }
+
+
+ data += stride;
+ }
+ } while(iternext(iter));
+
+ PyArray_free(tmp_buffer);
+ NpyIter_Deallocate(iter);
+
+ return 0;
+
+fail:
+ if (tmp_buffer != NULL) {
+ PyArray_free(tmp_buffer);
+ }
+ if (iter != NULL) {
+ NpyIter_Deallocate(iter);
+ }
+
+ return -1;
+}
+
/*
* Recursively determines the metadata for an NPY_DATETIME dtype.
@@ -4864,8 +3794,13 @@ recursive_find_object_datetime64_type(PyObject *obj,
/* Array -> use its metadata */
if (PyArray_Check(obj)) {
PyArray_Descr *obj_dtype = PyArray_DESCR(obj);
+
+ if (obj_dtype->type_num == NPY_STRING ||
+ obj_dtype->type_num == NPY_UNICODE) {
+ return find_string_array_datetime64_type(obj, meta);
+ }
/* If the array has metadata, use it */
- if (obj_dtype->type_num == NPY_DATETIME ||
+ else if (obj_dtype->type_num == NPY_DATETIME ||
obj_dtype->type_num == NPY_TIMEDELTA) {
PyArray_DatetimeMetaData *tmp_meta;
@@ -5007,6 +3942,7 @@ recursive_find_object_timedelta64_type(PyObject *obj,
/* Array -> use its metadata */
if (PyArray_Check(obj)) {
PyArray_Descr *obj_dtype = PyArray_DESCR(obj);
+
/* If the array has metadata, use it */
if (obj_dtype->type_num == NPY_DATETIME ||
obj_dtype->type_num == NPY_TIMEDELTA) {
diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c
new file mode 100644
index 000000000..679906aee
--- /dev/null
+++ b/numpy/core/src/multiarray/datetime_strings.c
@@ -0,0 +1,1219 @@
+/*
+ * This file implements string parsing and creation for NumPy datetime.
+ *
+ * Written by Mark Wiebe (mwwiebe@gmail.com)
+ * Copyright (c) 2011 by Enthought, Inc.
+ *
+ * See LICENSE.txt for the license.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include <time.h>
+
+#define _MULTIARRAYMODULE
+#include <numpy/arrayobject.h>
+
+#include "npy_config.h"
+#include "numpy/npy_3kcompat.h"
+
+#include "numpy/arrayscalars.h"
+#include "methods.h"
+#include "_datetime.h"
+#include "datetime_strings.h"
+
+/*
+ * Parses (almost) standard ISO 8601 date strings. The differences are:
+ *
+ * + After the date and time, may place a ' ' followed by an event number.
+ * + The date "20100312" is parsed as the year 20100312, not as
+ * equivalent to "2010-03-12". The '-' in the dates are not optional.
+ * + Only seconds may have a decimal point, with up to 18 digits after it
+ * (maximum attoseconds precision).
+ * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
+ * the date and the time. Both are treated equivalently.
+ * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats.
+ * + Doesn't handle leap seconds (seconds value has 60 in these cases).
+ * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
+ * + Accepts special values "NaT" (not a time), "Today", (current
+ * day according to local time) and "Now" (current time in UTC).
+ *
+ * 'str' must be a NULL-terminated string, and 'len' must be its length.
+ * 'unit' should contain -1 if the unit is unknown, or the unit
+ * which will be used if it is.
+ * 'casting' controls how the detected unit from the string is allowed
+ * to be cast to the 'unit' parameter.
+ *
+ * 'out' gets filled with the parsed date-time.
+ * 'out_local' gets set to 1 if the parsed time was in local time,
+ * to 0 otherwise. The values 'now' and 'today' don't get counted
+ * as local, and neither do UTC +/-#### timezone offsets, because
+ * they aren't using the computer's local timezone offset.
+ * 'out_bestunit' gives a suggested unit based on the amount of
+ * resolution provided in the string, or -1 for NaT.
+ * 'out_special' gets set to 1 if the parsed time was 'today',
+ * 'now', or ''/'NaT'. For 'today', the unit recommended is
+ * 'D', for 'now', the unit recommended is 's', and for 'NaT'
+ * the unit recommended is 'Y'.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+parse_iso_8601_datetime(char *str, int len,
+ NPY_DATETIMEUNIT unit,
+ NPY_CASTING casting,
+ npy_datetimestruct *out,
+ npy_bool *out_local,
+ NPY_DATETIMEUNIT *out_bestunit,
+ npy_bool *out_special)
+{
+ int year_leap = 0;
+ int i, numdigits;
+ char *substr, sublen;
+ NPY_DATETIMEUNIT bestunit;
+
+ /* Initialize the output to all zeros */
+ memset(out, 0, sizeof(npy_datetimestruct));
+ out->month = 1;
+ out->day = 1;
+
+ /* The empty string and case-variants of "NaT" parse to not-a-time */
+ if (len <= 0 || (len == 3 &&
+ tolower(str[0]) == 'n' &&
+ tolower(str[1]) == 'a' &&
+ tolower(str[2]) == 't')) {
+ out->year = NPY_DATETIME_NAT;
+
+ /*
+ * Indicate that this was a special value, and
+ * recommend generic units.
+ */
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+ if (out_bestunit != NULL) {
+ *out_bestunit = NPY_FR_GENERIC;
+ }
+ if (out_special != NULL) {
+ *out_special = 1;
+ }
+
+ return 0;
+ }
+
+ if (unit == NPY_FR_GENERIC) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot create a NumPy datetime other than NaT "
+ "with generic units");
+ return -1;
+ }
+
+ /*
+ * The string "today" resolves to midnight of today's local date in UTC.
+ * This is perhaps a little weird, but done so that further truncation
+ * to a 'datetime64[D]' type produces the date you expect, rather than
+ * switching to an adjacent day depending on the current time and your
+ * timezone.
+ */
+ if (len == 5 && tolower(str[0]) == 't' &&
+ tolower(str[1]) == 'o' &&
+ tolower(str[2]) == 'd' &&
+ tolower(str[3]) == 'a' &&
+ tolower(str[4]) == 'y') {
+ time_t rawtime = 0;
+ struct tm tm_;
+
+ /* 'today' only works for units of days or larger */
+ if (unit != -1 && unit > NPY_FR_D) {
+ PyErr_SetString(PyExc_ValueError,
+ "Special value 'today' can only be converted "
+ "to a NumPy datetime with 'D' or larger units");
+ return -1;
+ }
+
+ time(&rawtime);
+#if defined(_WIN32)
+ if (localtime_s(&tm_, &rawtime) != 0) {
+ PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to "
+ "get local time");
+ return -1;
+ }
+#else
+ /* Other platforms may require something else */
+ if (localtime_r(&rawtime, &tm_) == NULL) {
+ PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to "
+ "get local time");
+ return -1;
+ }
+#endif
+ out->year = tm_.tm_year + 1900;
+ out->month = tm_.tm_mon + 1;
+ out->day = tm_.tm_mday;
+
+ bestunit = NPY_FR_D;
+
+ /*
+ * Indicate that this was a special value, and
+ * is a date (unit 'D').
+ */
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+ if (out_bestunit != NULL) {
+ *out_bestunit = bestunit;
+ }
+ if (out_special != NULL) {
+ *out_special = 1;
+ }
+
+ /* Check the casting rule */
+ if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
+ casting)) {
+ PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit "
+ "'%s' using casting rule %s",
+ str, _datetime_strings[unit],
+ npy_casting_to_string(casting));
+ return -1;
+ }
+
+ return 0;
+ }
+
+ /* The string "now" resolves to the current UTC time */
+ if (len == 3 && tolower(str[0]) == 'n' &&
+ tolower(str[1]) == 'o' &&
+ tolower(str[2]) == 'w') {
+ time_t rawtime = 0;
+ PyArray_DatetimeMetaData meta;
+
+ time(&rawtime);
+
+ /* Set up a dummy metadata for the conversion */
+ meta.base = NPY_FR_s;
+ meta.num = 1;
+ meta.events = 1;
+
+ bestunit = NPY_FR_s;
+
+ /*
+ * Indicate that this was a special value, and
+ * use 's' because the time() function has resolution
+ * seconds.
+ */
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+ if (out_bestunit != NULL) {
+ *out_bestunit = bestunit;
+ }
+ if (out_special != NULL) {
+ *out_special = 1;
+ }
+
+ /* Check the casting rule */
+ if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
+ casting)) {
+ PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit "
+ "'%s' using casting rule %s",
+ str, _datetime_strings[unit],
+ npy_casting_to_string(casting));
+ return -1;
+ }
+
+ return convert_datetime_to_datetimestruct(&meta, rawtime, out);
+ }
+
+ /* Anything else isn't a special value */
+ if (out_special != NULL) {
+ *out_special = 0;
+ }
+
+ substr = str;
+ sublen = len;
+
+ /* Skip leading whitespace */
+ while (sublen > 0 && isspace(*substr)) {
+ ++substr;
+ --sublen;
+ }
+
+ /* Leading '-' sign for negative year */
+ if (*substr == '-') {
+ ++substr;
+ --sublen;
+ }
+
+ if (sublen == 0) {
+ goto parse_error;
+ }
+
+ /* PARSE THE YEAR (digits until the '-' character) */
+ out->year = 0;
+ while (sublen > 0 && isdigit(*substr)) {
+ out->year = 10 * out->year + (*substr - '0');
+ ++substr;
+ --sublen;
+ }
+
+ /* Negate the year if necessary */
+ if (str[0] == '-') {
+ out->year = -out->year;
+ }
+ /* Check whether it's a leap-year */
+ year_leap = is_leapyear(out->year);
+
+ /* Next character must be a '-' or the end of the string */
+ if (sublen == 0) {
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+ bestunit = NPY_FR_Y;
+ goto finish;
+ }
+ else if (*substr == '-') {
+ ++substr;
+ --sublen;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Can't have a trailing '-' */
+ if (sublen == 0) {
+ goto parse_error;
+ }
+
+ /* PARSE THE MONTH (2 digits) */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ out->month = 10 * (substr[0] - '0') + (substr[1] - '0');
+
+ if (out->month < 1 || out->month > 12) {
+ PyErr_Format(PyExc_ValueError,
+ "Month out of range in datetime string \"%s\"", str);
+ goto error;
+ }
+ substr += 2;
+ sublen -= 2;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Next character must be a '-' or the end of the string */
+ if (sublen == 0) {
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+ bestunit = NPY_FR_M;
+ goto finish;
+ }
+ else if (*substr == '-') {
+ ++substr;
+ --sublen;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Can't have a trailing '-' */
+ if (sublen == 0) {
+ goto parse_error;
+ }
+
+ /* PARSE THE DAY (2 digits) */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ out->day = 10 * (substr[0] - '0') + (substr[1] - '0');
+
+ if (out->day < 1 ||
+ out->day > _days_per_month_table[year_leap][out->month-1]) {
+ PyErr_Format(PyExc_ValueError,
+ "Day out of range in datetime string \"%s\"", str);
+ goto error;
+ }
+ substr += 2;
+ sublen -= 2;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Next character must be a 'T', ' ', or end of string */
+ if (sublen == 0) {
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+ bestunit = NPY_FR_D;
+ goto finish;
+ }
+ else if (*substr != 'T' && *substr != ' ') {
+ goto parse_error;
+ }
+ else {
+ ++substr;
+ --sublen;
+ }
+
+ /* PARSE THE HOURS (2 digits) */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ out->hour = 10 * (substr[0] - '0') + (substr[1] - '0');
+
+ if (out->hour < 0 || out->hour >= 24) {
+ PyErr_Format(PyExc_ValueError,
+ "Hours out of range in datetime string \"%s\"", str);
+ goto error;
+ }
+ substr += 2;
+ sublen -= 2;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Next character must be a ':' or the end of the string */
+ if (sublen > 0 && *substr == ':') {
+ ++substr;
+ --sublen;
+ }
+ else {
+ bestunit = NPY_FR_h;
+ goto parse_timezone;
+ }
+
+ /* Can't have a trailing ':' */
+ if (sublen == 0) {
+ goto parse_error;
+ }
+
+ /* PARSE THE MINUTES (2 digits) */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ out->min = 10 * (substr[0] - '0') + (substr[1] - '0');
+
+ if (out->hour < 0 || out->min >= 60) {
+ PyErr_Format(PyExc_ValueError,
+ "Minutes out of range in datetime string \"%s\"", str);
+ goto error;
+ }
+ substr += 2;
+ sublen -= 2;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Next character must be a ':' or the end of the string */
+ if (sublen > 0 && *substr == ':') {
+ ++substr;
+ --sublen;
+ }
+ else {
+ bestunit = NPY_FR_m;
+ goto parse_timezone;
+ }
+
+ /* Can't have a trailing ':' */
+ if (sublen == 0) {
+ goto parse_error;
+ }
+
+ /* PARSE THE SECONDS (2 digits) */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ out->sec = 10 * (substr[0] - '0') + (substr[1] - '0');
+
+ if (out->sec < 0 || out->sec >= 60) {
+ PyErr_Format(PyExc_ValueError,
+ "Seconds out of range in datetime string \"%s\"", str);
+ goto error;
+ }
+ substr += 2;
+ sublen -= 2;
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* Next character may be a '.' indicating fractional seconds */
+ if (sublen > 0 && *substr == '.') {
+ ++substr;
+ --sublen;
+ }
+ else {
+ bestunit = NPY_FR_s;
+ goto parse_timezone;
+ }
+
+ /* PARSE THE MICROSECONDS (0 to 6 digits) */
+ numdigits = 0;
+ for (i = 0; i < 6; ++i) {
+ out->us *= 10;
+ if (sublen > 0 && isdigit(*substr)) {
+ out->us += (*substr - '0');
+ ++substr;
+ --sublen;
+ ++numdigits;
+ }
+ }
+
+ if (sublen == 0 || !isdigit(*substr)) {
+ if (numdigits > 3) {
+ bestunit = NPY_FR_us;
+ }
+ else {
+ bestunit = NPY_FR_ms;
+ }
+ goto parse_timezone;
+ }
+
+ /* PARSE THE PICOSECONDS (0 to 6 digits) */
+ numdigits = 0;
+ for (i = 0; i < 6; ++i) {
+ out->ps *= 10;
+ if (sublen > 0 && isdigit(*substr)) {
+ out->ps += (*substr - '0');
+ ++substr;
+ --sublen;
+ ++numdigits;
+ }
+ }
+
+ if (sublen == 0 || !isdigit(*substr)) {
+ if (numdigits > 3) {
+ bestunit = NPY_FR_ps;
+ }
+ else {
+ bestunit = NPY_FR_ns;
+ }
+ goto parse_timezone;
+ }
+
+ /* PARSE THE ATTOSECONDS (0 to 6 digits) */
+ numdigits = 0;
+ for (i = 0; i < 6; ++i) {
+ out->as *= 10;
+ if (sublen > 0 && isdigit(*substr)) {
+ out->as += (*substr - '0');
+ ++substr;
+ --sublen;
+ ++numdigits;
+ }
+ }
+
+ if (numdigits > 3) {
+ bestunit = NPY_FR_as;
+ }
+ else {
+ bestunit = NPY_FR_fs;
+ }
+
+parse_timezone:
+ if (sublen == 0) {
+ /*
+ * ISO 8601 states to treat date-times without a timezone offset
+ * or 'Z' for UTC as local time. The C standard libary functions
+ * mktime and gmtime allow us to do this conversion.
+ *
+ * Only do this timezone adjustment for recent and future years.
+ */
+ if (out->year > 1900 && out->year < 10000) {
+ time_t rawtime = 0;
+ struct tm tm_;
+
+ tm_.tm_sec = out->sec;
+ tm_.tm_min = out->min;
+ tm_.tm_hour = out->hour;
+ tm_.tm_mday = out->day;
+ tm_.tm_mon = out->month - 1;
+ tm_.tm_year = out->year - 1900;
+ tm_.tm_isdst = -1;
+
+ /* mktime converts a local 'struct tm' into a time_t */
+ rawtime = mktime(&tm_);
+ if (rawtime == -1) {
+ PyErr_SetString(PyExc_OSError, "Failed to use mktime to "
+ "convert local time to UTC");
+ goto error;
+ }
+
+ /* gmtime converts a 'time_t' into a UTC 'struct tm' */
+#if defined(_WIN32)
+ if (gmtime_s(&tm_, &rawtime) != 0) {
+ PyErr_SetString(PyExc_OSError, "Failed to use gmtime_s to "
+ "get a UTC time");
+ goto error;
+ }
+#else
+ /* Other platforms may require something else */
+ if (gmtime_r(&rawtime, &tm_) == NULL) {
+ PyErr_SetString(PyExc_OSError, "Failed to use gmtime_r to "
+ "get a UTC time");
+ goto error;
+ }
+#endif
+ out->sec = tm_.tm_sec;
+ out->min = tm_.tm_min;
+ out->hour = tm_.tm_hour;
+ out->day = tm_.tm_mday;
+ out->month = tm_.tm_mon + 1;
+ out->year = tm_.tm_year + 1900;
+ }
+
+ /* Since neither "Z" nor a time-zone was specified, it's local */
+ if (out_local != NULL) {
+ *out_local = 1;
+ }
+
+ goto finish;
+ }
+
+ /* UTC specifier */
+ if (*substr == 'Z') {
+ /* "Z" means not local */
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+
+ if (sublen == 1) {
+ goto finish;
+ }
+ else {
+ ++substr;
+ --sublen;
+ }
+ }
+ /* Time zone offset */
+ else if (*substr == '-' || *substr == '+') {
+ int offset_neg = 0, offset_hour = 0, offset_minute = 0;
+
+ /*
+ * Since "local" means local with respect to the current
+ * machine, we say this is non-local.
+ */
+ if (out_local != NULL) {
+ *out_local = 0;
+ }
+
+ if (*substr == '-') {
+ offset_neg = 1;
+ }
+ ++substr;
+ --sublen;
+
+ /* The hours offset */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0');
+ substr += 2;
+ sublen -= 2;
+ if (offset_hour >= 24) {
+ PyErr_Format(PyExc_ValueError,
+ "Timezone hours offset out of range "
+ "in datetime string \"%s\"", str);
+ goto error;
+ }
+ }
+ else {
+ goto parse_error;
+ }
+
+ /* The minutes offset is optional */
+ if (sublen > 0) {
+ /* Optional ':' */
+ if (*substr == ':') {
+ ++substr;
+ --sublen;
+ }
+
+ /* The minutes offset (at the end of the string) */
+ if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
+ offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0');
+ substr += 2;
+ sublen -= 2;
+ if (offset_minute >= 60) {
+ PyErr_Format(PyExc_ValueError,
+ "Timezone minutes offset out of range "
+ "in datetime string \"%s\"", str);
+ goto error;
+ }
+ }
+ else {
+ goto parse_error;
+ }
+ }
+
+ /* Apply the time zone offset */
+ if (offset_neg) {
+ offset_hour = -offset_hour;
+ offset_minute = -offset_minute;
+ }
+ add_minutes_to_datetimestruct(out, -60 * offset_hour - offset_minute);
+ }
+
+ /* Skip trailing whitespace */
+ while (sublen > 0 && isspace(*substr)) {
+ ++substr;
+ --sublen;
+ }
+
+ if (sublen != 0) {
+ goto parse_error;
+ }
+
+finish:
+ if (out_bestunit != NULL) {
+ *out_bestunit = bestunit;
+ }
+
+ /* Check the casting rule */
+ if (unit != -1 && !can_cast_datetime64_units(bestunit, unit,
+ casting)) {
+ PyErr_Format(PyExc_ValueError, "Cannot parse \"%s\" as unit "
+ "'%s' using casting rule %s",
+ str, _datetime_strings[unit],
+ npy_casting_to_string(casting));
+ return -1;
+ }
+
+ return 0;
+
+parse_error:
+ PyErr_Format(PyExc_ValueError,
+ "Error parsing datetime string \"%s\" at position %d",
+ str, (int)(substr-str));
+ return -1;
+
+error:
+ return -1;
+}
+
+/*
+ * Provides a string length to use for converting datetime
+ * objects with the given local and unit settings.
+ */
+NPY_NO_EXPORT int
+get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base)
+{
+ int len = 0;
+
+ /* If no unit is provided, return the maximum length */
+ if (base == -1) {
+ return NPY_DATETIME_MAX_ISO8601_STRLEN;
+ }
+
+ switch (base) {
+ /* Generic units can only be used to represent NaT */
+ case NPY_FR_GENERIC:
+ return 4;
+ case NPY_FR_as:
+ len += 3; /* "###" */
+ case NPY_FR_fs:
+ len += 3; /* "###" */
+ case NPY_FR_ps:
+ len += 3; /* "###" */
+ case NPY_FR_ns:
+ len += 3; /* "###" */
+ case NPY_FR_us:
+ len += 3; /* "###" */
+ case NPY_FR_ms:
+ len += 4; /* ".###" */
+ case NPY_FR_s:
+ len += 3; /* ":##" */
+ case NPY_FR_m:
+ len += 3; /* ":##" */
+ case NPY_FR_h:
+ len += 3; /* "T##" */
+ case NPY_FR_D:
+ case NPY_FR_W:
+ len += 3; /* "-##" */
+ case NPY_FR_M:
+ len += 3; /* "-##" */
+ case NPY_FR_Y:
+ len += 21; /* 64-bit year */
+ break;
+ }
+
+ if (base >= NPY_FR_h) {
+ if (local) {
+ len += 5; /* "+####" or "-####" */
+ }
+ else {
+ len += 1; /* "Z" */
+ }
+ }
+
+ len += 1; /* NULL terminator */
+
+ return len;
+}
+
+/*
+ * Converts an npy_datetimestruct to an (almost) ISO 8601
+ * NULL-terminated string.
+ *
+ * If 'local' is non-zero, it produces a string in local time with
+ * a +-#### timezone offset, otherwise it uses timezone Z (UTC).
+ *
+ * 'base' restricts the output to that unit. Set 'base' to
+ * -1 to auto-detect a base after which all the values are zero.
+ *
+ * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is
+ * set to a value other than -1. This is a manual override for
+ * the local time zone to use, as an offset in minutes.
+ *
+ * Returns 0 on success, -1 on failure (for example if the output
+ * string was too short).
+ */
+NPY_NO_EXPORT int
+make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
+ int local, NPY_DATETIMEUNIT base, int tzoffset)
+{
+ npy_datetimestruct dts_local;
+ int timezone_offset = 0;
+
+ char *substr = outstr, sublen = outlen;
+ int tmplen;
+
+ /* Handle NaT, and treat a datetime with generic units as NaT */
+ if (dts->year == NPY_DATETIME_NAT || base == NPY_FR_GENERIC) {
+ if (outlen < 4) {
+ goto string_too_short;
+ }
+ outstr[0] = 'N';
+ outstr[0] = 'a';
+ outstr[0] = 'T';
+ outstr[0] = '\0';
+
+ return 0;
+ }
+
+ /* Only do local time within a reasonable year range */
+ if ((dts->year <= 1900 || dts->year >= 10000) && tzoffset == -1) {
+ local = 0;
+ }
+
+ /* Automatically detect a good unit */
+ if (base == -1) {
+ if (dts->as % 1000 != 0) {
+ base = NPY_FR_as;
+ }
+ else if (dts->as != 0) {
+ base = NPY_FR_fs;
+ }
+ else if (dts->ps % 1000 != 0) {
+ base = NPY_FR_ps;
+ }
+ else if (dts->ps != 0) {
+ base = NPY_FR_ns;
+ }
+ else if (dts->us % 1000 != 0) {
+ base = NPY_FR_us;
+ }
+ else if (dts->us != 0) {
+ base = NPY_FR_ms;
+ }
+ else if (dts->sec != 0) {
+ base = NPY_FR_s;
+ }
+ /*
+ * hours and minutes don't get split up by default, and printing
+ * in local time forces minutes
+ */
+ else if (local || dts->min != 0 || dts->hour != 0) {
+ base = NPY_FR_m;
+ }
+ /* dates don't get split up by default */
+ else {
+ base = NPY_FR_D;
+ }
+ }
+ /*
+ * Print weeks with the same precision as days.
+ *
+ * TODO: Could print weeks with YYYY-Www format if the week
+ * epoch is a Monday.
+ */
+ else if (base == NPY_FR_W) {
+ base = NPY_FR_D;
+ }
+
+ /* Printed dates have no time zone */
+ if (base < NPY_FR_h) {
+ local = 0;
+ }
+
+ /* Use the C API to convert from UTC to local time */
+ if (local && tzoffset == -1) {
+ time_t rawtime = 0, localrawtime;
+ struct tm tm_;
+
+ /*
+ * Convert everything in 'dts' to a time_t, to minutes precision.
+ * This is POSIX time, which skips leap-seconds, but because
+ * we drop the seconds value from the npy_datetimestruct, everything
+ * is ok for this operation.
+ */
+ rawtime = (time_t)get_datetimestruct_days(dts) * 24 * 60 * 60;
+ rawtime += dts->hour * 60 * 60;
+ rawtime += dts->min * 60;
+
+ /* localtime converts a 'time_t' into a local 'struct tm' */
+#if defined(_WIN32)
+ if (localtime_s(&tm_, &rawtime) != 0) {
+ PyErr_SetString(PyExc_OSError, "Failed to use localtime_s to "
+ "get a local time");
+ return -1;
+ }
+#else
+ /* Other platforms may require something else */
+ if (localtime_r(&rawtime, &tm_) == NULL) {
+ PyErr_SetString(PyExc_OSError, "Failed to use localtime_r to "
+ "get a local time");
+ return -1;
+ }
+#endif
+ /* Make a copy of the npy_datetimestruct we can modify */
+ dts_local = *dts;
+
+ /* Copy back all the values except seconds */
+ dts_local.min = tm_.tm_min;
+ dts_local.hour = tm_.tm_hour;
+ dts_local.day = tm_.tm_mday;
+ dts_local.month = tm_.tm_mon + 1;
+ dts_local.year = tm_.tm_year + 1900;
+
+ /* Extract the timezone offset that was applied */
+ rawtime /= 60;
+ localrawtime = (time_t)get_datetimestruct_days(&dts_local) * 24 * 60;
+ localrawtime += dts_local.hour * 60;
+ localrawtime += dts_local.min;
+
+ timezone_offset = localrawtime - rawtime;
+
+ /* Set dts to point to our local time instead of the UTC time */
+ dts = &dts_local;
+ }
+ /* Use the manually provided tzoffset */
+ else if (local) {
+ /* Make a copy of the npy_datetimestruct we can modify */
+ dts_local = *dts;
+ dts = &dts_local;
+
+ /* Set and apply the required timezone offset */
+ timezone_offset = tzoffset;
+ add_minutes_to_datetimestruct(dts, timezone_offset);
+ }
+
+ /* YEAR */
+#ifdef _WIN32
+ tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year);
+#else
+ tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year);
+#endif
+ /* If it ran out of space or there isn't space for the NULL terminator */
+ if (tmplen < 0 || tmplen >= sublen) {
+ goto string_too_short;
+ }
+ substr += tmplen;
+ sublen -= tmplen;
+
+ /* Stop if the unit is years */
+ if (base == NPY_FR_Y) {
+ *substr = '\0';
+ return 0;
+ }
+
+ /* MONTH */
+ substr[0] = '-';
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->month / 10) + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->month % 10) + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is months */
+ if (base == NPY_FR_M) {
+ *substr = '\0';
+ return 0;
+ }
+
+ /* DAY */
+ substr[0] = '-';
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->day / 10) + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->day % 10) + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is days */
+ if (base == NPY_FR_D) {
+ *substr = '\0';
+ return 0;
+ }
+
+ /* HOUR */
+ substr[0] = 'T';
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->hour / 10) + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->hour % 10) + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is hours */
+ if (base == NPY_FR_h) {
+ goto add_time_zone;
+ }
+
+ /* MINUTE */
+ substr[0] = ':';
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->min / 10) + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->min % 10) + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is minutes */
+ if (base == NPY_FR_m) {
+ goto add_time_zone;
+ }
+
+ /* SECOND */
+ substr[0] = ':';
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->sec / 10) + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->sec % 10) + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is seconds */
+ if (base == NPY_FR_s) {
+ goto add_time_zone;
+ }
+
+ /* MILLISECOND */
+ substr[0] = '.';
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->us / 100000) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->us / 10000) % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr[3] = (char)((dts->us / 1000) % 10 + '0');
+ if (sublen <= 4 ) {
+ goto string_too_short;
+ }
+ substr += 4;
+ sublen -= 4;
+
+ /* Stop if the unit is milliseconds */
+ if (base == NPY_FR_ms) {
+ goto add_time_zone;
+ }
+
+ /* MICROSECOND */
+ substr[0] = (char)((dts->us / 100) % 10 + '0');
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->us / 10) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)(dts->us % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is microseconds */
+ if (base == NPY_FR_us) {
+ goto add_time_zone;
+ }
+
+ /* NANOSECOND */
+ substr[0] = (char)((dts->ps / 100000) % 10 + '0');
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->ps / 10000) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->ps / 1000) % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is nanoseconds */
+ if (base == NPY_FR_ns) {
+ goto add_time_zone;
+ }
+
+ /* PICOSECOND */
+ substr[0] = (char)((dts->ps / 100) % 10 + '0');
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->ps / 10) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)(dts->ps % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is picoseconds */
+ if (base == NPY_FR_ps) {
+ goto add_time_zone;
+ }
+
+ /* FEMTOSECOND */
+ substr[0] = (char)((dts->as / 100000) % 10 + '0');
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->as / 10000) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)((dts->as / 1000) % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+ /* Stop if the unit is femtoseconds */
+ if (base == NPY_FR_fs) {
+ goto add_time_zone;
+ }
+
+ /* ATTOSECOND */
+ substr[0] = (char)((dts->as / 100) % 10 + '0');
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((dts->as / 10) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)(dts->as % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr += 3;
+ sublen -= 3;
+
+add_time_zone:
+ if (local) {
+ /* Add the +/- sign */
+ if (timezone_offset < 0) {
+ substr[0] = '-';
+ timezone_offset = -timezone_offset;
+ }
+ else {
+ substr[0] = '+';
+ }
+ if (sublen <= 1) {
+ goto string_too_short;
+ }
+ substr += 1;
+ sublen -= 1;
+
+ /* Add the timezone offset */
+ substr[0] = (char)((timezone_offset / (10*60)) % 10 + '0');
+ if (sublen <= 1 ) {
+ goto string_too_short;
+ }
+ substr[1] = (char)((timezone_offset / 60) % 10 + '0');
+ if (sublen <= 2 ) {
+ goto string_too_short;
+ }
+ substr[2] = (char)(((timezone_offset % 60) / 10) % 10 + '0');
+ if (sublen <= 3 ) {
+ goto string_too_short;
+ }
+ substr[3] = (char)((timezone_offset % 60) % 10 + '0');
+ if (sublen <= 4 ) {
+ goto string_too_short;
+ }
+ substr += 4;
+ sublen -= 4;
+ }
+ /* UTC "Zulu" time */
+ else {
+ substr[0] = 'Z';
+ if (sublen <= 1) {
+ goto string_too_short;
+ }
+ substr += 1;
+ sublen -= 1;
+ }
+
+ /* Add a NULL terminator, and return */
+ substr[0] = '\0';
+
+ return 0;
+
+string_too_short:
+ /* Put a NULL terminator on anyway */
+ if (outlen > 0) {
+ outstr[outlen-1] = '\0';
+ }
+
+ PyErr_Format(PyExc_RuntimeError,
+ "The string provided for NumPy ISO datetime formatting "
+ "was too short, with length %d",
+ outlen);
+ return -1;
+}
+
+
diff --git a/numpy/core/src/multiarray/datetime_strings.h b/numpy/core/src/multiarray/datetime_strings.h
new file mode 100644
index 000000000..2b48f49b9
--- /dev/null
+++ b/numpy/core/src/multiarray/datetime_strings.h
@@ -0,0 +1,77 @@
+#ifndef _NPY_PRIVATE__DATETIME_STRINGS_H_
+#define _NPY_PRIVATE__DATETIME_STRINGS_H_
+
+/*
+ * Parses (almost) standard ISO 8601 date strings. The differences are:
+ *
+ * + After the date and time, may place a ' ' followed by an event number.
+ * + The date "20100312" is parsed as the year 20100312, not as
+ * equivalent to "2010-03-12". The '-' in the dates are not optional.
+ * + Only seconds may have a decimal point, with up to 18 digits after it
+ * (maximum attoseconds precision).
+ * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate
+ * the date and the time. Both are treated equivalently.
+ * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats.
+ * + Doesn't handle leap seconds (seconds value has 60 in these cases).
+ * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
+ * + Accepts special values "NaT" (not a time), "Today", (current
+ * day according to local time) and "Now" (current time in UTC).
+ *
+ * 'str' must be a NULL-terminated string, and 'len' must be its length.
+ * 'unit' should contain -1 if the unit is unknown, or the unit
+ * which will be used if it is.
+ * 'casting' controls how the detected unit from the string is allowed
+ * to be cast to the 'unit' parameter.
+ *
+ * 'out' gets filled with the parsed date-time.
+ * 'out_local' gets set to 1 if the parsed time was in local time,
+ * to 0 otherwise. The values 'now' and 'today' don't get counted
+ * as local, and neither do UTC +/-#### timezone offsets, because
+ * they aren't using the computer's local timezone offset.
+ * 'out_bestunit' gives a suggested unit based on the amount of
+ * resolution provided in the string, or -1 for NaT.
+ * 'out_special' gets set to 1 if the parsed time was 'today',
+ * 'now', or ''/'NaT'. For 'today', the unit recommended is
+ * 'D', for 'now', the unit recommended is 's', and for 'NaT'
+ * the unit recommended is 'Y'.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+parse_iso_8601_datetime(char *str, int len,
+ NPY_DATETIMEUNIT unit,
+ NPY_CASTING casting,
+ npy_datetimestruct *out,
+ npy_bool *out_local,
+ NPY_DATETIMEUNIT *out_bestunit,
+ npy_bool *out_special);
+
+/*
+ * Provides a string length to use for converting datetime
+ * objects with the given local and unit settings.
+ */
+NPY_NO_EXPORT int
+get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
+
+/*
+ * Converts an npy_datetimestruct to an (almost) ISO 8601
+ * NULL-terminated string.
+ *
+ * If 'local' is non-zero, it produces a string in local time with
+ * a +-#### timezone offset, otherwise it uses timezone Z (UTC).
+ *
+ * 'base' restricts the output to that unit. Set 'base' to
+ * -1 to auto-detect a base after which all the values are zero.
+ *
+ * 'tzoffset' is used if 'local' is enabled, and 'tzoffset' is
+ * set to a value other than -1. This is a manual override for
+ * the local time zone to use, as an offset in minutes.
+ *
+ * Returns 0 on success, -1 on failure (for example if the output
+ * string was too short).
+ */
+NPY_NO_EXPORT int
+make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
+ int local, NPY_DATETIMEUNIT base, int tzoffset);
+
+#endif
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 3b136e072..72b1a5a65 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -45,6 +45,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "convert_datatype.h"
#include "nditer_pywrap.h"
#include "_datetime.h"
+#include "datetime_strings.h"
#include "datetime_busday.h"
#include "datetime_busdaycal.h"
@@ -2939,7 +2940,7 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args,
/* Zero the destination string completely */
memset(dataptr[1], 0, strsize);
/* Convert that into a string */
- if (make_iso_8601_date(&dts, (char *)dataptr[1], strsize,
+ if (make_iso_8601_datetime(&dts, (char *)dataptr[1], strsize,
local, unit, tzoffset) < 0) {
goto fail;
}
diff --git a/numpy/core/src/multiarray/multiarraymodule_onefile.c b/numpy/core/src/multiarray/multiarraymodule_onefile.c
index 4459e6b4c..bcfe73e0f 100644
--- a/numpy/core/src/multiarray/multiarraymodule_onefile.c
+++ b/numpy/core/src/multiarray/multiarraymodule_onefile.c
@@ -11,6 +11,7 @@
#include "scalarapi.c"
#include "datetime.c"
+#include "datetime_strings.c"
#include "datetime_busday.c"
#include "datetime_busdaycal.c"
#include "arraytypes.c"
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 380ec3493..04ae37ffd 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -22,6 +22,7 @@
#include "common.h"
#include "scalartypes.h"
#include "_datetime.h"
+#include "datetime_strings.h"
NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[] = {
{PyObject_HEAD_INIT(&PyBoolArrType_Type) 0},
@@ -618,7 +619,7 @@ datetimetype_repr(PyObject *self)
return NULL;
}
- if (make_iso_8601_date(&dts, iso, sizeof(iso), 1,
+ if (make_iso_8601_datetime(&dts, iso, sizeof(iso), 1,
scal->obmeta.base, -1) < 0) {
return NULL;
}
@@ -679,7 +680,7 @@ datetimetype_str(PyObject *self)
return NULL;
}
- if (make_iso_8601_date(&dts, iso, sizeof(iso), 1,
+ if (make_iso_8601_datetime(&dts, iso, sizeof(iso), 1,
scal->obmeta.base, -1) < 0) {
return NULL;
}