diff options
-rw-r--r-- | numpy/core/include/numpy/ndarraytypes.h | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/_datetime.h | 17 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 56 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime.c | 196 | ||||
-rw-r--r-- | numpy/core/tests/test_datetime.py | 37 |
5 files changed, 304 insertions, 30 deletions
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 95bc78d47..9dd060aac 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -214,20 +214,20 @@ typedef enum { } NPY_CLIPMODE; typedef enum { - NPY_FR_Y, - NPY_FR_M, - NPY_FR_W, - NPY_FR_B, - NPY_FR_D, - NPY_FR_h, - NPY_FR_m, - NPY_FR_s, - NPY_FR_ms, - NPY_FR_us, - NPY_FR_ns, - NPY_FR_ps, - NPY_FR_fs, - NPY_FR_as + NPY_FR_Y, /* Years */ + NPY_FR_M, /* Months */ + NPY_FR_W, /* Weeks */ + NPY_FR_B, /* Business days (weekdays, doesn't account for holidays) */ + NPY_FR_D, /* Days */ + NPY_FR_h, /* hours */ + NPY_FR_m, /* minutes */ + NPY_FR_s, /* seconds */ + NPY_FR_ms,/* milliseconds */ + NPY_FR_us,/* microseconds */ + NPY_FR_ns,/* nanoseconds */ + NPY_FR_ps,/* picoseconds */ + NPY_FR_fs,/* femtoseconds */ + NPY_FR_as /* attoseconds */ } NPY_DATETIMEUNIT; #define NPY_DATETIME_NUMUNITS (NPY_FR_as + 1) diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index 7f97fd416..77cf10e48 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -23,7 +23,7 @@ NPY_NO_EXPORT PyArray_DatetimeMetaData * get_datetime_metadata_from_dtype(PyArray_Descr *dtype); /* - * This function returns a reference to a PyCObject/Capsule + * This function returns a reference to a capsule * which contains the datetime metadata parsed from a metadata * string. 'metastr' should be NULL-terminated, and len should * contain its string length. @@ -59,9 +59,20 @@ parse_datetime_unit_from_string(char *str, Py_ssize_t len, char *metastr); NPY_NO_EXPORT int convert_datetime_divisor_to_multiple(PyArray_DatetimeMetaData *meta, int den, char *metastr); +/* + * Computes the GCD of the two date-time metadata values. Raises + * an exception if there is no reasonable GCD, such as with + * years and days. + * + * Returns a capsule with the GCD metadata. + */ +NPY_NO_EXPORT PyObject * +compute_datetime_metadata_greatest_common_divisor( + PyArray_Descr *type1, + PyArray_Descr *type2); /* - * Given an the CObject/Capsule datetime metadata object, + * Given an the capsule datetime metadata object, * returns a tuple for pickling and other purposes. */ NPY_NO_EXPORT PyObject * @@ -69,7 +80,7 @@ convert_datetime_metadata_to_tuple(PyArray_DatetimeMetaData *meta); /* * Given a tuple representing datetime metadata, - * returns a CObject/Capsule datetime metadata object. + * returns a capsule datetime metadata object. */ NPY_NO_EXPORT PyObject * convert_datetime_metadata_tuple_to_metacobj(PyObject *tuple); diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index b033e9587..50ea8d711 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -759,35 +759,67 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) return NULL; } } - /* 'M[A]','m[B]' -> 'M[A]', but only when A divides into B */ + /* 'M[A]','m[B]' -> 'M[A]' */ else if (type_num2 == NPY_TIMEDELTA) { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(type1); - if (meta1 == NULL) { - return NULL; - } - meta2 = get_datetime_metadata_from_dtype(type2); - if (meta2 == NULL) { - return NULL; - } - Py_INCREF(type1); return type1; } break; case NPY_TIMEDELTA: + /* 'm[A]','M[B]' -> 'M[B]' */ if (type_num2 == NPY_DATETIME) { + Py_INCREF(type2); + return type2; } + /* 'm[A]','m[B]' -> 'm[gcd(A,B)]' */ else if (type_num2 == NPY_TIMEDELTA) { + PyObject *gcdmeta; + PyArray_Descr *dtype; + + /* Get the metadata GCD */ + gcdmeta = compute_datetime_metadata_greatest_common_divisor( + type1, type2); + if (gcdmeta == NULL) { + return NULL; + } + + /* Create a TIMEDELTA dtype */ + dtype = PyArray_DescrNewFromType(PyArray_TIMEDELTA); + if (dtype == NULL) { + Py_DECREF(gcdmeta); + return NULL; + } + + /* Replace the metadata dictionary */ + Py_XDECREF(dtype->metadata); + dtype->metadata = PyDict_New(); + if (dtype->metadata == NULL) { + Py_DECREF(dtype); + Py_DECREF(gcdmeta); + return NULL; + } + + /* Set the metadata object in the dictionary. */ + if (PyDict_SetItemString(dtype->metadata, NPY_METADATA_DTSTR, + gcdmeta) < 0) { + Py_DECREF(dtype); + Py_DECREF(gcdmeta); + return NULL; + } + Py_DECREF(gcdmeta); + + return dtype; } else if (PyTypeNum_ISINTEGER(type_num2) || PyTypeNum_ISFLOAT(type_num2)) { + Py_INCREF(type1); + return type1; } break; } switch (type_num2) { - /* BOOL can convert to anything */ + /* BOOL can convert to almost anything */ case NPY_BOOL: if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA && type_num1 != NPY_VOID) { diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index eb35ce88e..1eb100756 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -1159,7 +1159,7 @@ parse_dtype_from_datetime_typestr(char *typestr, Py_ssize_t len) return NULL; } - /* Parse the metadata string into a metadata CObject */ + /* Parse the metadata string into a metadata capsule */ metacobj = parse_datetime_metacobj_from_metastr(metastr, metalen); if (metacobj == NULL) { Py_DECREF(dtype); @@ -1267,6 +1267,200 @@ convert_datetime_divisor_to_multiple(PyArray_DatetimeMetaData *meta, } /* + * Lookup table for factors between datetime units, except + * for years, months, and business days. + */ +static npy_uint32 +_datetime_factors[] = { + 1, /* Years - not used */ + 1, /* Months - not used */ + 7, /* Weeks -> Days */ + 1, /* Business days - not used */ + 24, /* Days -> Hours */ + 60, /* Hours -> Minutes */ + 60, /* Minutes -> Seconds */ + 1000, + 1000, + 1000, + 1000, + 1000, + 1000, + 1 /* Attoseconds are the smallest base unit */ +}; + +/* + * Returns the scale factor between the units. Does not validate + * that bigbase represents larger units than littlebase. + * + * Returns 0 if there is an overflow. + */ +static npy_uint64 +get_datetime_units_factor(NPY_DATETIMEUNIT bigbase, NPY_DATETIMEUNIT littlebase) +{ + npy_uint64 factor = 1; + int unit = (int)bigbase; + while (littlebase > unit) { + factor *= _datetime_factors[unit]; + /* + * Detect overflow by disallowing the top 16 bits to be 1. + * That alows a margin of error much bigger than any of + * the datetime factors. + */ + if (factor&0xff00000000000000ULL) { + return 0; + } + ++unit; + } + return factor; +} + +/* Euclidean algorithm on two positive numbers */ +static npy_uint64 +_uint64_euclidean_gcd(npy_uint64 x, npy_uint64 y) +{ + npy_uint64 tmp; + + if (x > y) { + tmp = x; + x = y; + y = tmp; + } + while (x != y && y != 0) { + tmp = x % y; + x = y; + y = tmp; + } + + return x; +} + +NPY_NO_EXPORT PyObject * +compute_datetime_metadata_greatest_common_divisor( + PyArray_Descr *type1, + PyArray_Descr *type2) +{ + PyArray_DatetimeMetaData *meta1, *meta2, *dt_data; + NPY_DATETIMEUNIT base; + npy_uint64 num1, num2, num; + int events = 1; + + if ((type1->type_num != NPY_DATETIME && + type1->type_num != NPY_TIMEDELTA) || + (type1->type_num != NPY_DATETIME && + type1->type_num != NPY_TIMEDELTA)) { + PyErr_SetString(PyExc_TypeError, + "Require datetime types for metadata " + "greatest common divisor operation"); + return NULL; + } + + meta1 = get_datetime_metadata_from_dtype(type1); + if (meta1 == NULL) { + return NULL; + } + meta2 = get_datetime_metadata_from_dtype(type2); + if (meta2 == NULL) { + return NULL; + } + + if (meta1->events != 1 || meta2->events != 1) { + /* + * When there are events specified, both the units + * base and the events must match. + */ + if (meta1->base != meta2->base || meta1->events != meta2->events) { + goto incompatible_units; + } + events = meta1->events; + } + + num1 = (npy_uint64)meta1->num; + num2 = (npy_uint64)meta2->num; + + /* First validate that the units have a reasonable GCD */ + if (meta1->base == meta2->base) { + base = meta1->base; + } + else { + /* + * Years, Months, and Business days are incompatible with + * all other units. + */ + if (meta1->base == NPY_FR_Y || meta1->base == NPY_FR_M || + meta1->base == NPY_FR_B || + meta2->base == NPY_FR_Y || + meta2->base == NPY_FR_M || + meta2->base == NPY_FR_B) { + goto incompatible_units; + } + + /* Take the greater base (unit sizes are decreasing in enum) */ + if (meta1->base > meta2->base) { + base = meta1->base; + num2 *= get_datetime_units_factor(meta2->base, meta1->base); + if (num2 == 0) { + goto units_overflow; + } + } + else { + base = meta2->base; + num1 *= get_datetime_units_factor(meta1->base, meta2->base); + if (num1 == 0) { + goto units_overflow; + } + } + } + + /* Compute the GCD of the resulting multipliers */ + num = _uint64_euclidean_gcd(num1, num2); + + /* Create and return the metadata capsule */ + dt_data = PyArray_malloc(sizeof(PyArray_DatetimeMetaData)); + if (dt_data == NULL) { + return PyErr_NoMemory(); + } + + dt_data->base = base; + dt_data->num = (int)num; + if (dt_data->num <= 0 || num != (npy_uint64)dt_data->num) { + goto units_overflow; + } + dt_data->events = events; + + return NpyCapsule_FromVoidPtr((void *)dt_data, simple_capsule_dtor); + +incompatible_units: { + PyObject *errmsg; + errmsg = PyUString_FromString("Cannot get " + "a common metadata divisor for types "); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)type1)); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromString(" and ")); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)type2)); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromString(" because they have " + "incompatible base units or events")); + PyErr_SetObject(PyExc_TypeError, errmsg); + return NULL; + } +units_overflow: { + PyObject *errmsg; + errmsg = PyUString_FromString("Integer overflow " + "getting a common metadata divisor for types "); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)type1)); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromString(" and ")); + PyUString_ConcatAndDel(&errmsg, + PyObject_Repr((PyObject *)type2)); + PyErr_SetObject(PyExc_OverflowError, errmsg); + return NULL; + } +} + +/* * Converts a substring given by 'str' and 'len' into * a date time unit enum value. The 'metastr' parameter * is used for error messages, and may be NULL. diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 0728654ee..6c37b01f8 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -26,10 +26,47 @@ class TestDateTime(TestCase): assert_raises(TypeError, np.dtype, 'm16') def test_dtype_promotion(self): + # datetime <op> datetime requires matching units assert_equal(np.promote_types(np.dtype('M8[Y]'), np.dtype('M8[Y]')), np.dtype('M8[Y]')) assert_raises(TypeError, np.promote_types, np.dtype('M8[Y]'), np.dtype('M8[M]')) + # timedelta <op> timedelta computes the metadata gcd + assert_equal( + np.promote_types(np.dtype('m8[2Y]'), np.dtype('m8[2Y]')), + np.dtype('m8[2Y]')) + assert_equal( + np.promote_types(np.dtype('m8[12Y]'), np.dtype('m8[15Y]')), + np.dtype('m8[3Y]')) + assert_equal( + np.promote_types(np.dtype('m8[62M]'), np.dtype('m8[24M]')), + np.dtype('m8[2M]')) + assert_equal( + np.promote_types(np.dtype('m8[1W]'), np.dtype('m8[2D]')), + np.dtype('m8[1D]')) + assert_equal( + np.promote_types(np.dtype('m8[W]'), np.dtype('m8[13s]')), + np.dtype('m8[s]')) + assert_equal( + np.promote_types(np.dtype('m8[13W]'), np.dtype('m8[49s]')), + np.dtype('m8[7s]')) + # timedelta <op> timedelta raises when there is no reasonable gcd + assert_raises(TypeError, np.promote_types, + np.dtype('m8[Y]'), np.dtype('m8[M]')) + assert_raises(TypeError, np.promote_types, + np.dtype('m8[Y]'), np.dtype('m8[D]')) + assert_raises(TypeError, np.promote_types, + np.dtype('m8[Y]'), np.dtype('m8[B]')) + assert_raises(TypeError, np.promote_types, + np.dtype('m8[D]'), np.dtype('m8[B]')) + assert_raises(TypeError, np.promote_types, + np.dtype('m8[M]'), np.dtype('m8[W]')) + # timedelta <op> timedelta may overflow with big unit ranges + assert_raises(OverflowError, np.promote_types, + np.dtype('m8[W]'), np.dtype('m8[fs]')) + assert_raises(OverflowError, np.promote_types, + np.dtype('m8[s]'), np.dtype('m8[as]')) + def test_hours(self): t = np.ones(3, dtype='M8[s]') |