diff options
-rw-r--r-- | numpy/core/arrayprint.py | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 50 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 155 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.h | 12 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 70 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime_strings.c | 166 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 287 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 24 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer.c.src | 36 | ||||
-rw-r--r-- | numpy/core/tests/test_datetime.py | 17 |
10 files changed, 571 insertions, 259 deletions
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 556d4da04..567573af5 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -245,7 +245,7 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ', 'complexfloat' : ComplexFormat(data, precision, suppress_small), 'longcomplexfloat' : LongComplexFormat(precision), - 'datetime' : DatetimeFormat(True, None, -1), + 'datetime' : DatetimeFormat(), 'timedelta' : TimedeltaFormat(data), 'numpystr' : repr, 'str' : str} @@ -698,16 +698,17 @@ class ComplexFormat(object): return r + i class DatetimeFormat(object): - def __init__(self, uselocaltime=True, overrideunit=None, tzoffset=-1): - self.local = uselocaltime + def __init__(self, overrideunit=None, + timezone='local', casting='same_kind'): + self.timezone = timezone self.unit = overrideunit - self.tzoffset = -1 + self.casting = casting def __call__(self, x): return "'%s'" % datetime_as_string(x, - local=self.local, unit=self.unit, - tzoffset=self.tzoffset) + timezone=self.timezone, + casting=self.casting) class TimedeltaFormat(object): def __init__(self, data): diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 28846462d..2b0a89d2a 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -309,7 +309,7 @@ _array_typedescr_fromstr(char *str) switch (typechar) { case 'b': if (size == sizeof(Bool)) { - type_num = PyArray_BOOL; + type_num = NPY_BOOL; } else { PyErr_SetString(PyExc_ValueError, msg); @@ -318,22 +318,22 @@ _array_typedescr_fromstr(char *str) break; case 'u': if (size == sizeof(uintp)) { - type_num = PyArray_UINTP; + type_num = NPY_UINTP; } else if (size == sizeof(char)) { - type_num = PyArray_UBYTE; + type_num = NPY_UBYTE; } else if (size == sizeof(short)) { - type_num = PyArray_USHORT; + type_num = NPY_USHORT; } else if (size == sizeof(ulong)) { - type_num = PyArray_ULONG; + type_num = NPY_ULONG; } else if (size == sizeof(int)) { - type_num = PyArray_UINT; + type_num = NPY_UINT; } else if (size == sizeof(ulonglong)) { - type_num = PyArray_ULONGLONG; + type_num = NPY_ULONGLONG; } else { PyErr_SetString(PyExc_ValueError, msg); @@ -342,22 +342,22 @@ _array_typedescr_fromstr(char *str) break; case 'i': if (size == sizeof(intp)) { - type_num = PyArray_INTP; + type_num = NPY_INTP; } else if (size == sizeof(char)) { - type_num = PyArray_BYTE; + type_num = NPY_BYTE; } else if (size == sizeof(short)) { - type_num = PyArray_SHORT; + type_num = NPY_SHORT; } else if (size == sizeof(long)) { - type_num = PyArray_LONG; + type_num = NPY_LONG; } else if (size == sizeof(int)) { - type_num = PyArray_INT; + type_num = NPY_INT; } else if (size == sizeof(longlong)) { - type_num = PyArray_LONGLONG; + type_num = NPY_LONGLONG; } else { PyErr_SetString(PyExc_ValueError, msg); @@ -366,13 +366,13 @@ _array_typedescr_fromstr(char *str) break; case 'f': if (size == sizeof(float)) { - type_num = PyArray_FLOAT; + type_num = NPY_FLOAT; } else if (size == sizeof(double)) { - type_num = PyArray_DOUBLE; + type_num = NPY_DOUBLE; } else if (size == sizeof(longdouble)) { - type_num = PyArray_LONGDOUBLE; + type_num = NPY_LONGDOUBLE; } else { PyErr_SetString(PyExc_ValueError, msg); @@ -381,13 +381,13 @@ _array_typedescr_fromstr(char *str) break; case 'c': if (size == sizeof(float)*2) { - type_num = PyArray_CFLOAT; + type_num = NPY_CFLOAT; } else if (size == sizeof(double)*2) { - type_num = PyArray_CDOUBLE; + type_num = NPY_CDOUBLE; } else if (size == sizeof(longdouble)*2) { - type_num = PyArray_CLONGDOUBLE; + type_num = NPY_CLONGDOUBLE; } else { PyErr_SetString(PyExc_ValueError, msg); @@ -396,22 +396,22 @@ _array_typedescr_fromstr(char *str) break; case 'O': if (size == sizeof(PyObject *)) { - type_num = PyArray_OBJECT; + type_num = NPY_OBJECT; } else { PyErr_SetString(PyExc_ValueError, msg); return NULL; } break; - case PyArray_STRINGLTR: - type_num = PyArray_STRING; + case NPY_STRINGLTR: + type_num = NPY_STRING; break; - case PyArray_UNICODELTR: - type_num = PyArray_UNICODE; + case NPY_UNICODELTR: + type_num = NPY_UNICODE; size <<= 2; break; case 'V': - type_num = PyArray_VOID; + type_num = NPY_VOID; break; default: PyErr_SetString(PyExc_ValueError, msg); diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index a1864c2ac..68029f17d 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -17,6 +17,7 @@ #include "convert_datatype.h" #include "_datetime.h" +#include "datetime_strings.h" /*NUMPY_API * For backward compatibility @@ -31,30 +32,11 @@ NPY_NO_EXPORT PyObject * PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int fortran) { PyObject *out; - PyArray_Descr *arr_dtype; - arr_dtype = PyArray_DESCR(arr); - - if (dtype->elsize == 0) { - PyArray_DESCR_REPLACE(dtype); - if (dtype == NULL) { - return NULL; - } - - if (arr_dtype->type_num == dtype->type_num) { - dtype->elsize = arr_dtype->elsize; - } - else if (arr_dtype->type_num == NPY_STRING && - dtype->type_num == NPY_UNICODE) { - dtype->elsize = arr_dtype->elsize * 4; - } - else if (arr_dtype->type_num == NPY_UNICODE && - dtype->type_num == NPY_STRING) { - dtype->elsize = arr_dtype->elsize / 4; - } - else if (dtype->type_num == NPY_VOID) { - dtype->elsize = arr_dtype->elsize; - } + /* If the requested dtype is flexible, adapt it */ + PyArray_AdaptFlexibleType((PyObject *)arr, PyArray_DESCR(arr), &dtype); + if (dtype == NULL) { + return NULL; } out = PyArray_NewFromDescr(Py_TYPE(arr), dtype, @@ -137,6 +119,133 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) } /* + * This function calls Py_DECREF on flex_dtype, and replaces it with + * a new dtype that has been adapted based on the values in data_dtype + * and data_obj. If the flex_dtype is not flexible, it leaves it as is. + * + * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, + * and NPY_DATETIME with generic units. + */ +NPY_NO_EXPORT void +PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, + PyArray_Descr **flex_dtype) +{ + PyArray_DatetimeMetaData *meta; + + /* Flexible types with expandable size */ + if ((*flex_dtype)->elsize == 0) { + /* First replace the flex dtype */ + PyArray_DESCR_REPLACE(*flex_dtype); + if (*flex_dtype == NULL) { + return; + } + + if (data_dtype->type_num == (*flex_dtype)->type_num || + (*flex_dtype)->type_num == NPY_VOID) { + (*flex_dtype)->elsize = data_dtype->elsize; + } + else { + npy_intp size = 8; + + /* Get a string-size estimate of the input */ + switch (data_dtype->type_num) { + case NPY_BOOL: + size = 5; + break; + case NPY_UBYTE: + size = 3; + break; + case NPY_BYTE: + size = 4; + break; + case NPY_USHORT: + size = 5; + break; + case NPY_SHORT: + size = 6; + break; + case NPY_UINT: + size = 10; + break; + case NPY_INT: + size = 6; + break; + case NPY_ULONG: + size = 20; + break; + case NPY_LONG: + size = 21; + break; + case NPY_ULONGLONG: + size = 20; + break; + case NPY_LONGLONG: + size = 21; + break; + case NPY_HALF: + case NPY_FLOAT: + case NPY_DOUBLE: + case NPY_LONGDOUBLE: + size = 32; + break; + case NPY_CFLOAT: + case NPY_CDOUBLE: + case NPY_CLONGDOUBLE: + size = 64; + break; + case NPY_OBJECT: + size = 64; + break; + case NPY_STRING: + case NPY_VOID: + size = data_dtype->elsize; + break; + case NPY_UNICODE: + size = data_dtype->elsize / 4; + break; + case NPY_DATETIME: + meta = get_datetime_metadata_from_dtype(data_dtype); + if (meta == NULL) { + Py_DECREF(*flex_dtype); + *flex_dtype = NULL; + return; + } + size = get_datetime_iso_8601_strlen(0, meta->base); + break; + case NPY_TIMEDELTA: + size = 21; + break; + } + + if ((*flex_dtype)->type_num == NPY_STRING) { + (*flex_dtype)->elsize = size; + } + else if ((*flex_dtype)->type_num == NPY_UNICODE) { + (*flex_dtype)->elsize = size * 4; + } + } + } + /* Flexible type with generic time unit that adapts */ + else if ((*flex_dtype)->type_num == NPY_DATETIME || + (*flex_dtype)->type_num == NPY_TIMEDELTA) { + meta = get_datetime_metadata_from_dtype(*flex_dtype); + if (meta == NULL) { + Py_DECREF(*flex_dtype); + *flex_dtype = NULL; + return; + } + + if (meta->base == NPY_FR_GENERIC) { + /* Detect the unit from the input's data */ + PyArray_Descr *dtype = find_object_datetime_type(data_obj, + (*flex_dtype)->type_num); + Py_DECREF(*flex_dtype); + *flex_dtype = dtype; + } + } +} + +/* * Must be broadcastable. * This code is very similar to PyArray_CopyInto/PyArray_MoveInto * except casting is done --- PyArray_BUFSIZE is used diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index 844cce0c9..5e0f31f50 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -13,4 +13,16 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn); NPY_NO_EXPORT int PyArray_ValidType(int type); +/* + * This function calls Py_DECREF on flex_dtype, and replaces it with + * a new dtype that has been adapted based on the values in data_dtype + * and data_obj. If the flex_dtype is not flexible, it leaves it as is. + * + * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, + * and NPY_DATETIME with generic units. + */ +NPY_NO_EXPORT void +PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, + PyArray_Descr **flex_dtype); + #endif diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 8d4a7430b..cdcf6b77a 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -15,11 +15,13 @@ #include "common.h" #include "ctors.h" +#include "convert_datatype.h" #include "shape.h" #include "buffer.h" #include "numpymemoryview.h" #include "lowlevel_strided_loops.h" #include "_datetime.h" +#include "datetime_strings.h" /* * Reading from a file or a string. @@ -1653,72 +1655,14 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, 0, &dtype, &ndim, dims, &arr, context) < 0) { Py_XDECREF(newtype); - ret = NULL; return NULL; } - /* If the requested dtype is flexible, adjust its size */ - if (newtype != NULL && newtype->elsize == 0) { - PyArray_DESCR_REPLACE(newtype); - if (newtype == NULL) { - ret = NULL; - return NULL; - } - if (arr != NULL) { - dtype = PyArray_DESCR(arr); - } - - if (newtype->type_num == dtype->type_num) { - newtype->elsize = dtype->elsize; - } - else { - switch(newtype->type_num) { - case NPY_STRING: - if (dtype->type_num == NPY_UNICODE) { - newtype->elsize = dtype->elsize >> 2; - } - else { - newtype->elsize = dtype->elsize; - } - break; - case NPY_UNICODE: - newtype->elsize = dtype->elsize << 2; - break; - case NPY_VOID: - newtype->elsize = dtype->elsize; - break; - } - } - } - /* - * Treat datetime generic units with the same idea as flexible strings. - * - * Flexible strings, for example the dtype 'str', use size zero as a - * signal indicating that they represent a "generic string type" instead - * of a string type with the size already baked in. The generic unit - * plays the same role, indicating that it's a "generic datetime type", - * and the actual unit should be filled in when needed just like the - * actual string size should be filled in when needed. - */ - else if (newtype != NULL && newtype->type_num == NPY_DATETIME) { - PyArray_DatetimeMetaData *meta = - get_datetime_metadata_from_dtype(newtype); - if (meta == NULL) { - Py_DECREF(newtype); - return NULL; - } - - if (meta->base == NPY_FR_GENERIC) { - /* Detect the unit from the input's data */ - PyArray_Descr *dtype = find_object_datetime_type(op, - newtype->type_num); - if (dtype == NULL) { - Py_DECREF(newtype); - return NULL; - } - Py_DECREF(newtype); - newtype = dtype; - } + /* If the requested dtype is flexible, adapt it */ + if (newtype != NULL) { + PyArray_AdaptFlexibleType(op, + (dtype == NULL) ? PyArray_DESCR(arr) : dtype, + &newtype); } /* If we got dimensions and dtype instead of an array */ diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c index b75920920..ead52830d 100644 --- a/numpy/core/src/multiarray/datetime_strings.c +++ b/numpy/core/src/multiarray/datetime_strings.c @@ -781,7 +781,8 @@ lossless_unit_from_datetimestruct(npy_datetimestruct *dts) /* * Converts an npy_datetimestruct to an (almost) ISO 8601 - * NULL-terminated string. + * NULL-terminated string. If the string fits in the space exactly, + * it leaves out the NULL terminator and returns success. * * If 'local' is non-zero, it produces a string in local time with * a +-#### timezone offset, otherwise it uses timezone Z (UTC). @@ -814,13 +815,15 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, /* Handle NaT, and treat a datetime with generic units as NaT */ if (dts->year == NPY_DATETIME_NAT || base == NPY_FR_GENERIC) { - if (outlen < 4) { + if (outlen < 3) { goto string_too_short; } outstr[0] = 'N'; outstr[1] = 'a'; outstr[2] = 'T'; - outstr[3] = '\0'; + if (outlen > 3) { + outstr[3] = '\0'; + } return 0; } @@ -955,7 +958,7 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); #endif /* If it ran out of space or there isn't space for the NULL terminator */ - if (tmplen < 0 || tmplen >= sublen) { + if (tmplen < 0 || tmplen > sublen) { goto string_too_short; } substr += tmplen; @@ -963,67 +966,73 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, /* Stop if the unit is years */ if (base == NPY_FR_Y) { - *substr = '\0'; + if (sublen > 0) { + *substr = '\0'; + } return 0; } /* MONTH */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = '-'; - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->month / 10) + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->month % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; /* Stop if the unit is months */ if (base == NPY_FR_M) { - *substr = '\0'; + if (sublen > 0) { + *substr = '\0'; + } return 0; } /* DAY */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = '-'; - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->day / 10) + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->day % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; /* Stop if the unit is days */ if (base == NPY_FR_D) { - *substr = '\0'; + if (sublen > 0) { + *substr = '\0'; + } return 0; } /* HOUR */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = 'T'; - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->hour / 10) + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->hour % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1033,18 +1042,18 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* MINUTE */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = ':'; - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->min / 10) + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->min % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1054,18 +1063,18 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* SECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = ':'; - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->sec / 10) + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->sec % 10) + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1075,22 +1084,22 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* MILLISECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = '.'; - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->us / 100000) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->us / 10000) % 10 + '0'); - if (sublen <= 3 ) { + if (sublen < 4 ) { goto string_too_short; } substr[3] = (char)((dts->us / 1000) % 10 + '0'); - if (sublen <= 4 ) { - goto string_too_short; - } substr += 4; sublen -= 4; @@ -1100,18 +1109,18 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* MICROSECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = (char)((dts->us / 100) % 10 + '0'); - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->us / 10) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)(dts->us % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1121,18 +1130,18 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* NANOSECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = (char)((dts->ps / 100000) % 10 + '0'); - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->ps / 10000) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->ps / 1000) % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1142,18 +1151,18 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* PICOSECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = (char)((dts->ps / 100) % 10 + '0'); - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->ps / 10) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)(dts->ps % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1163,18 +1172,18 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* FEMTOSECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = (char)((dts->as / 100000) % 10 + '0'); - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->as / 10000) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)((dts->as / 1000) % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; @@ -1184,24 +1193,27 @@ make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, } /* ATTOSECOND */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = (char)((dts->as / 100) % 10 + '0'); - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((dts->as / 10) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)(dts->as % 10 + '0'); - if (sublen <= 3 ) { - goto string_too_short; - } substr += 3; sublen -= 3; add_time_zone: if (local) { /* Add the +/- sign */ + if (sublen < 1) { + goto string_too_short; + } if (timezone_offset < 0) { substr[0] = '-'; timezone_offset = -timezone_offset; @@ -1209,53 +1221,47 @@ add_time_zone: else { substr[0] = '+'; } - if (sublen <= 1) { - goto string_too_short; - } substr += 1; sublen -= 1; /* Add the timezone offset */ + if (sublen < 1 ) { + goto string_too_short; + } substr[0] = (char)((timezone_offset / (10*60)) % 10 + '0'); - if (sublen <= 1 ) { + if (sublen < 2 ) { goto string_too_short; } substr[1] = (char)((timezone_offset / 60) % 10 + '0'); - if (sublen <= 2 ) { + if (sublen < 3 ) { goto string_too_short; } substr[2] = (char)(((timezone_offset % 60) / 10) % 10 + '0'); - if (sublen <= 3 ) { + if (sublen < 4 ) { goto string_too_short; } substr[3] = (char)((timezone_offset % 60) % 10 + '0'); - if (sublen <= 4 ) { - goto string_too_short; - } substr += 4; sublen -= 4; } /* UTC "Zulu" time */ else { - substr[0] = 'Z'; - if (sublen <= 1) { + if (sublen < 1) { goto string_too_short; } + substr[0] = 'Z'; substr += 1; sublen -= 1; } /* Add a NULL terminator, and return */ - substr[0] = '\0'; + if (sublen > 0) { + substr[0] = '\0'; + } return 0; string_too_short: - /* Put a NULL terminator on anyway */ - if (outlen > 0) { - outstr[outlen-1] = '\0'; - } - PyErr_Format(PyExc_RuntimeError, "The string provided for NumPy ISO datetime formatting " "was too short, with length %d", diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index c17593afe..f2dd39141 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -21,6 +21,7 @@ #include "numpy/npy_3kcompat.h" #include "_datetime.h" +#include "datetime_strings.h" #include "lowlevel_strided_loops.h" @@ -699,12 +700,23 @@ get_nbo_cast_numeric_transfer_function(int aligned, return NPY_SUCCEED; } -/* Does a datetime->datetime or timedelta->timedelta cast */ +/* + * Does a datetime->datetime, timedelta->timedelta, + * datetime->ascii, or ascii->datetime cast + */ typedef struct { free_strided_transfer_data freefunc; copy_strided_transfer_data copyfunc; /* The conversion fraction */ npy_int64 num, denom; + /* For the datetime -> string conversion, the dst string length */ + npy_intp src_itemsize, dst_itemsize; + /* + * A buffer of size 'src_itemsize + 1', for when the input + * string is exactly of length src_itemsize with no NULL + * terminator. + */ + char *tmp_buffer; /* * The metadata for when dealing with Months or Years * which behave non-linearly with respect to the other @@ -713,7 +725,17 @@ typedef struct { PyArray_DatetimeMetaData src_meta, dst_meta; } _strided_datetime_cast_data; -/* strided cast data copy function */ +/* strided datetime cast data free function */ +void _strided_datetime_cast_data_free(void *data) +{ + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + if (d->tmp_buffer != NULL) { + PyArray_free(d->tmp_buffer); + } + PyArray_free(data); +} + +/* strided datetime cast data copy function */ void *_strided_datetime_cast_data_copy(void *data) { _strided_datetime_cast_data *newdata = @@ -724,6 +746,13 @@ void *_strided_datetime_cast_data_copy(void *data) } memcpy(newdata, data, sizeof(_strided_datetime_cast_data)); + if (newdata->tmp_buffer != NULL) { + newdata->tmp_buffer = PyArray_malloc(newdata->src_itemsize + 1); + if (newdata->tmp_buffer == NULL) { + PyArray_free(newdata); + return NULL; + } + } return (void *)newdata; } @@ -823,6 +852,93 @@ _aligned_strided_to_strided_datetime_cast(char *dst, } } +static void +_strided_to_strided_datetime_to_string(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *data) +{ + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_intp dst_itemsize = d->dst_itemsize; + npy_int64 dt; + npy_datetimestruct dts; + + while (N > 0) { + memcpy(&dt, src, sizeof(dt)); + + if (convert_datetime_to_datetimestruct(&d->src_meta, + dt, &dts) < 0) { + /* For an error, produce a 'NaT' string */ + dts.year = NPY_DATETIME_NAT; + } + + /* Initialize the destination to all zeros */ + memset(dst, 0, dst_itemsize); + + /* + * This may also raise an error, but the caller needs + * to use PyErr_Occurred(). + */ + make_iso_8601_datetime(&dts, dst, dst_itemsize, + 0, d->src_meta.base, -1, + NPY_UNSAFE_CASTING); + + dst += dst_stride; + src += src_stride; + --N; + } +} + +static void +_strided_to_strided_string_to_datetime(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_int64 dt; + npy_datetimestruct dts; + char *tmp_buffer = d->tmp_buffer; + npy_intp len; + + while (N > 0) { + len = strnlen(src, src_itemsize); + + /* If the string is all full, use the buffer */ + if (len == src_itemsize) { + memcpy(tmp_buffer, src, src_itemsize); + tmp_buffer[src_itemsize] = '\0'; + + if (parse_iso_8601_datetime(tmp_buffer, len, + d->dst_meta.base, NPY_SAME_KIND_CASTING, + &dts, NULL, NULL, NULL) < 0) { + dt = NPY_DATETIME_NAT; + } + } + /* Otherwise parse the data in place */ + else { + if (parse_iso_8601_datetime(src, len, + d->dst_meta.base, NPY_SAME_KIND_CASTING, + &dts, NULL, NULL, NULL) < 0) { + dt = NPY_DATETIME_NAT; + } + } + + /* Convert to the datetime */ + if (dt != NPY_DATETIME_NAT && + convert_datetimestruct_to_datetime(&d->dst_meta, + &dts, &dt) < 0) { + dt = NPY_DATETIME_NAT; + } + + memcpy(dst, &dt, sizeof(dt)); + + dst += dst_stride; + src += src_stride; + --N; + } +} + /* * Assumes src_dtype and dst_dtype are both datetimes or both timedeltas */ @@ -861,10 +977,11 @@ get_nbo_cast_datetime_transfer_function(int aligned, *out_transferdata = NULL; return NPY_FAIL; } - data->freefunc = &PyArray_free; + data->freefunc = &_strided_datetime_cast_data_free; data->copyfunc = &_strided_datetime_cast_data_copy; data->num = num; data->denom = denom; + data->tmp_buffer = NULL; /* * Special case the datetime (but not timedelta) with the nonlinear @@ -902,6 +1019,105 @@ get_nbo_cast_datetime_transfer_function(int aligned, } static int +get_nbo_datetime_to_string_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedTransferFn **out_stransfer, + void **out_transferdata) +{ + PyArray_DatetimeMetaData *src_meta; + _strided_datetime_cast_data *data; + + src_meta = get_datetime_metadata_from_dtype(src_dtype); + if (src_meta == NULL) { + return NPY_FAIL; + } + + /* Allocate the data for the casting */ + data = (_strided_datetime_cast_data *)PyArray_malloc( + sizeof(_strided_datetime_cast_data)); + if (data == NULL) { + PyErr_NoMemory(); + *out_stransfer = NULL; + *out_transferdata = NULL; + return NPY_FAIL; + } + data->freefunc = &_strided_datetime_cast_data_free; + data->copyfunc = &_strided_datetime_cast_data_copy; + data->dst_itemsize = dst_dtype->elsize; + data->tmp_buffer = NULL; + + memcpy(&data->src_meta, src_meta, sizeof(data->src_meta)); + + *out_stransfer = &_strided_to_strided_datetime_to_string; + *out_transferdata = data; + +#if NPY_DT_DBG_TRACING + printf("Dtype transfer from "); + PyObject_Print((PyObject *)src_dtype, stdout, 0); + printf(" to "); + PyObject_Print((PyObject *)dst_dtype, stdout, 0); + printf("\n"); + printf("has conversion fraction %lld/%lld\n", num, denom); +#endif + + return NPY_SUCCEED; +} + +static int +get_nbo_string_to_datetime_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedTransferFn **out_stransfer, + void **out_transferdata) +{ + PyArray_DatetimeMetaData *dst_meta; + _strided_datetime_cast_data *data; + + dst_meta = get_datetime_metadata_from_dtype(dst_dtype); + if (dst_meta == NULL) { + return NPY_FAIL; + } + + /* Allocate the data for the casting */ + data = (_strided_datetime_cast_data *)PyArray_malloc( + sizeof(_strided_datetime_cast_data)); + if (data == NULL) { + PyErr_NoMemory(); + *out_stransfer = NULL; + *out_transferdata = NULL; + return NPY_FAIL; + } + data->freefunc = &_strided_datetime_cast_data_free; + data->copyfunc = &_strided_datetime_cast_data_copy; + data->src_itemsize = src_dtype->elsize; + data->tmp_buffer = PyArray_malloc(data->src_itemsize + 1); + if (data->tmp_buffer == NULL) { + PyErr_NoMemory(); + PyArray_free(data); + *out_stransfer = NULL; + *out_transferdata = NULL; + return NPY_FAIL; + } + + memcpy(&data->dst_meta, dst_meta, sizeof(data->dst_meta)); + + *out_stransfer = &_strided_to_strided_string_to_datetime; + *out_transferdata = data; + +#if NPY_DT_DBG_TRACING + printf("Dtype transfer from "); + PyObject_Print((PyObject *)src_dtype, stdout, 0); + printf(" to "); + PyObject_Print((PyObject *)dst_dtype, stdout, 0); + printf("\n"); + printf("has conversion fraction %lld/%lld\n", num, denom); +#endif + + return NPY_SUCCEED; +} + +static int get_nbo_cast_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -927,17 +1143,60 @@ get_nbo_cast_transfer_function(int aligned, out_stransfer, out_transferdata); } - /* As a parameterized type, datetime->datetime sometimes needs casting */ - if ((src_dtype->type_num == NPY_DATETIME && - dst_dtype->type_num == NPY_DATETIME) || - (src_dtype->type_num == NPY_TIMEDELTA && - dst_dtype->type_num == NPY_TIMEDELTA)) { - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || - !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_cast_datetime_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); + if (src_dtype->type_num == NPY_DATETIME || + src_dtype->type_num == NPY_TIMEDELTA || + dst_dtype->type_num == NPY_DATETIME || + dst_dtype->type_num == NPY_TIMEDELTA) { + /* A parameterized type, datetime->datetime sometimes needs casting */ + if ((src_dtype->type_num == NPY_DATETIME && + dst_dtype->type_num == NPY_DATETIME) || + (src_dtype->type_num == NPY_TIMEDELTA && + dst_dtype->type_num == NPY_TIMEDELTA)) { + *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || + !PyArray_ISNBO(dst_dtype->byteorder); + return get_nbo_cast_datetime_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata); + } + + /* + * Datetime <-> string conversions can be handled specially. + * The functions may raise an error if the strings have no + * space, or can't be parsed properly. + */ + if (src_dtype->type_num == NPY_DATETIME) { + switch (dst_dtype->type_num) { + case NPY_STRING: + *out_needs_api = 1; + *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder); + return get_nbo_datetime_to_string_transfer_function( + aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata); + + case NPY_UNICODE: + *out_needs_api = 1; + break; + } + } + else if (dst_dtype->type_num == NPY_DATETIME) { + switch (src_dtype->type_num) { + case NPY_STRING: + *out_needs_api = 1; + *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder); + return get_nbo_string_to_datetime_transfer_function( + aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata); + + case NPY_UNICODE: + *out_needs_api = 1; + break; + } + } } *out_needs_wrap = !aligned || diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index b9954e7f0..af53df435 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -17,6 +17,7 @@ #include "calculation.h" #include "methods.h" +#include "convert_datatype.h" /* NpyArg_ParseKeywords @@ -831,24 +832,11 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) else if (PyArray_CanCastArrayTo(self, dtype, casting)) { PyArrayObject *ret; - if (dtype->elsize == 0) { - PyArray_DESCR_REPLACE(dtype); - if (dtype == NULL) { - return NULL; - } - - if (dtype->type_num == PyArray_DESCR(self)->type_num || - dtype->type_num == NPY_VOID) { - dtype->elsize = PyArray_DESCR(self)->elsize; - } - else if (PyArray_DESCR(self)->type_num == NPY_STRING && - dtype->type_num == NPY_UNICODE) { - dtype->elsize = PyArray_DESCR(self)->elsize * 4; - } - else if (PyArray_DESCR(self)->type_num == NPY_UNICODE && - dtype->type_num == NPY_STRING) { - dtype->elsize = PyArray_DESCR(self)->elsize / 4; - } + /* If the requested dtype is flexible, adapt it */ + PyArray_AdaptFlexibleType((PyObject *)self, PyArray_DESCR(self), + &dtype); + if (dtype == NULL) { + return NULL; } /* This steals the reference to dtype, so no DECREF of dtype */ diff --git a/numpy/core/src/multiarray/nditer.c.src b/numpy/core/src/multiarray/nditer.c.src index 3d7be4137..d8de7b5c5 100644 --- a/numpy/core/src/multiarray/nditer.c.src +++ b/numpy/core/src/multiarray/nditer.c.src @@ -3145,37 +3145,13 @@ npyiter_prepare_one_operand(PyArrayObject **op, if (op_request_dtype != NULL) { /* We just have a borrowed reference to op_request_dtype */ Py_INCREF(op_request_dtype); - /* If it's a data type without a size, set the size */ - if (op_request_dtype->elsize == 0) { - PyArray_DESCR_REPLACE(op_request_dtype); - if (op_request_dtype == NULL) { - return 0; - } - - if (op_request_dtype->type_num == NPY_STRING) { - switch((*op_dtype)->type_num) { - case NPY_STRING: - op_request_dtype->elsize = (*op_dtype)->elsize; - break; - case NPY_UNICODE: - op_request_dtype->elsize = (*op_dtype)->elsize >> 2; - break; - } - } - else if (op_request_dtype->type_num == NPY_UNICODE) { - switch((*op_dtype)->type_num) { - case NPY_STRING: - op_request_dtype->elsize = (*op_dtype)->elsize << 2; - break; - case NPY_UNICODE: - op_request_dtype->elsize = (*op_dtype)->elsize; - break; - } - } - else if (op_request_dtype->type_num == NPY_VOID) { - op_request_dtype->elsize = (*op_dtype)->elsize; - } + /* If the requested dtype is flexible, adapt it */ + PyArray_AdaptFlexibleType((PyObject *)(*op), PyArray_DESCR(*op), + &op_request_dtype); + if (op_request_dtype == NULL) { + return 0; } + /* Store the requested dtype */ Py_DECREF(*op_dtype); *op_dtype = op_request_dtype; diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 207aee27e..a6dca9714 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -444,6 +444,23 @@ class TestDateTime(TestCase): assert_raises(TypeError, np.array, datetime.date(1960, 3, 12), dtype='M8[s]') + def test_datetime_string_conversion(self): + a = ['2011-03-16', '1920-01-01', '2013-05-19'] + str_a = np.array(a, dtype='S0') + dt_a = np.array(a, dtype='M') + str_b = np.empty_like(str_a) + dt_b = np.empty_like(dt_a) + + # String to datetime + assert_equal(dt_a, str_a.astype('M')) + assert_equal(dt_a.dtype, str_a.astype('M').dtype) + dt_b[...] = str_a + assert_equal(dt_a, dt_b) + # Datetime to string + assert_equal(str_a, dt_a.astype('S0')) + str_b[...] = dt_a + assert_equal(str_a, str_b) + def test_pickle(self): # Check that pickle roundtripping works dt = np.dtype('M8[7D]') |