summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-05-20 17:45:12 -0500
committerMark Wiebe <mwiebe@enthought.com>2011-05-20 17:58:46 -0500
commit19b71adb99587eab6e63509b01ff09c8ca544b5b (patch)
treee619a37399a42e82edd75a1e62d28217179d540b
parent8727a806b34a412bce086df9288ee18cdafe365c (diff)
downloadnumpy-19b71adb99587eab6e63509b01ff09c8ca544b5b.tar.gz
ENH: promote_types applies the datetime promotion rules
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h28
-rw-r--r--numpy/core/src/multiarray/_datetime.h17
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c56
-rw-r--r--numpy/core/src/multiarray/datetime.c196
-rw-r--r--numpy/core/tests/test_datetime.py37
5 files changed, 304 insertions, 30 deletions
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 95bc78d47..9dd060aac 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -214,20 +214,20 @@ typedef enum {
} NPY_CLIPMODE;
typedef enum {
- NPY_FR_Y,
- NPY_FR_M,
- NPY_FR_W,
- NPY_FR_B,
- NPY_FR_D,
- NPY_FR_h,
- NPY_FR_m,
- NPY_FR_s,
- NPY_FR_ms,
- NPY_FR_us,
- NPY_FR_ns,
- NPY_FR_ps,
- NPY_FR_fs,
- NPY_FR_as
+ NPY_FR_Y, /* Years */
+ NPY_FR_M, /* Months */
+ NPY_FR_W, /* Weeks */
+ NPY_FR_B, /* Business days (weekdays, doesn't account for holidays) */
+ NPY_FR_D, /* Days */
+ NPY_FR_h, /* hours */
+ NPY_FR_m, /* minutes */
+ NPY_FR_s, /* seconds */
+ NPY_FR_ms,/* milliseconds */
+ NPY_FR_us,/* microseconds */
+ NPY_FR_ns,/* nanoseconds */
+ NPY_FR_ps,/* picoseconds */
+ NPY_FR_fs,/* femtoseconds */
+ NPY_FR_as /* attoseconds */
} NPY_DATETIMEUNIT;
#define NPY_DATETIME_NUMUNITS (NPY_FR_as + 1)
diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h
index 7f97fd416..77cf10e48 100644
--- a/numpy/core/src/multiarray/_datetime.h
+++ b/numpy/core/src/multiarray/_datetime.h
@@ -23,7 +23,7 @@ NPY_NO_EXPORT PyArray_DatetimeMetaData *
get_datetime_metadata_from_dtype(PyArray_Descr *dtype);
/*
- * This function returns a reference to a PyCObject/Capsule
+ * This function returns a reference to a capsule
* which contains the datetime metadata parsed from a metadata
* string. 'metastr' should be NULL-terminated, and len should
* contain its string length.
@@ -59,9 +59,20 @@ parse_datetime_unit_from_string(char *str, Py_ssize_t len, char *metastr);
NPY_NO_EXPORT int
convert_datetime_divisor_to_multiple(PyArray_DatetimeMetaData *meta,
int den, char *metastr);
+/*
+ * Computes the GCD of the two date-time metadata values. Raises
+ * an exception if there is no reasonable GCD, such as with
+ * years and days.
+ *
+ * Returns a capsule with the GCD metadata.
+ */
+NPY_NO_EXPORT PyObject *
+compute_datetime_metadata_greatest_common_divisor(
+ PyArray_Descr *type1,
+ PyArray_Descr *type2);
/*
- * Given an the CObject/Capsule datetime metadata object,
+ * Given an the capsule datetime metadata object,
* returns a tuple for pickling and other purposes.
*/
NPY_NO_EXPORT PyObject *
@@ -69,7 +80,7 @@ convert_datetime_metadata_to_tuple(PyArray_DatetimeMetaData *meta);
/*
* Given a tuple representing datetime metadata,
- * returns a CObject/Capsule datetime metadata object.
+ * returns a capsule datetime metadata object.
*/
NPY_NO_EXPORT PyObject *
convert_datetime_metadata_tuple_to_metacobj(PyObject *tuple);
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index b033e9587..50ea8d711 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -759,35 +759,67 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
return NULL;
}
}
- /* 'M[A]','m[B]' -> 'M[A]', but only when A divides into B */
+ /* 'M[A]','m[B]' -> 'M[A]' */
else if (type_num2 == NPY_TIMEDELTA) {
- PyArray_DatetimeMetaData *meta1, *meta2;
- meta1 = get_datetime_metadata_from_dtype(type1);
- if (meta1 == NULL) {
- return NULL;
- }
- meta2 = get_datetime_metadata_from_dtype(type2);
- if (meta2 == NULL) {
- return NULL;
- }
-
Py_INCREF(type1);
return type1;
}
break;
case NPY_TIMEDELTA:
+ /* 'm[A]','M[B]' -> 'M[B]' */
if (type_num2 == NPY_DATETIME) {
+ Py_INCREF(type2);
+ return type2;
}
+ /* 'm[A]','m[B]' -> 'm[gcd(A,B)]' */
else if (type_num2 == NPY_TIMEDELTA) {
+ PyObject *gcdmeta;
+ PyArray_Descr *dtype;
+
+ /* Get the metadata GCD */
+ gcdmeta = compute_datetime_metadata_greatest_common_divisor(
+ type1, type2);
+ if (gcdmeta == NULL) {
+ return NULL;
+ }
+
+ /* Create a TIMEDELTA dtype */
+ dtype = PyArray_DescrNewFromType(PyArray_TIMEDELTA);
+ if (dtype == NULL) {
+ Py_DECREF(gcdmeta);
+ return NULL;
+ }
+
+ /* Replace the metadata dictionary */
+ Py_XDECREF(dtype->metadata);
+ dtype->metadata = PyDict_New();
+ if (dtype->metadata == NULL) {
+ Py_DECREF(dtype);
+ Py_DECREF(gcdmeta);
+ return NULL;
+ }
+
+ /* Set the metadata object in the dictionary. */
+ if (PyDict_SetItemString(dtype->metadata, NPY_METADATA_DTSTR,
+ gcdmeta) < 0) {
+ Py_DECREF(dtype);
+ Py_DECREF(gcdmeta);
+ return NULL;
+ }
+ Py_DECREF(gcdmeta);
+
+ return dtype;
}
else if (PyTypeNum_ISINTEGER(type_num2) ||
PyTypeNum_ISFLOAT(type_num2)) {
+ Py_INCREF(type1);
+ return type1;
}
break;
}
switch (type_num2) {
- /* BOOL can convert to anything */
+ /* BOOL can convert to almost anything */
case NPY_BOOL:
if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA &&
type_num1 != NPY_VOID) {
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index eb35ce88e..1eb100756 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -1159,7 +1159,7 @@ parse_dtype_from_datetime_typestr(char *typestr, Py_ssize_t len)
return NULL;
}
- /* Parse the metadata string into a metadata CObject */
+ /* Parse the metadata string into a metadata capsule */
metacobj = parse_datetime_metacobj_from_metastr(metastr, metalen);
if (metacobj == NULL) {
Py_DECREF(dtype);
@@ -1267,6 +1267,200 @@ convert_datetime_divisor_to_multiple(PyArray_DatetimeMetaData *meta,
}
/*
+ * Lookup table for factors between datetime units, except
+ * for years, months, and business days.
+ */
+static npy_uint32
+_datetime_factors[] = {
+ 1, /* Years - not used */
+ 1, /* Months - not used */
+ 7, /* Weeks -> Days */
+ 1, /* Business days - not used */
+ 24, /* Days -> Hours */
+ 60, /* Hours -> Minutes */
+ 60, /* Minutes -> Seconds */
+ 1000,
+ 1000,
+ 1000,
+ 1000,
+ 1000,
+ 1000,
+ 1 /* Attoseconds are the smallest base unit */
+};
+
+/*
+ * Returns the scale factor between the units. Does not validate
+ * that bigbase represents larger units than littlebase.
+ *
+ * Returns 0 if there is an overflow.
+ */
+static npy_uint64
+get_datetime_units_factor(NPY_DATETIMEUNIT bigbase, NPY_DATETIMEUNIT littlebase)
+{
+ npy_uint64 factor = 1;
+ int unit = (int)bigbase;
+ while (littlebase > unit) {
+ factor *= _datetime_factors[unit];
+ /*
+ * Detect overflow by disallowing the top 16 bits to be 1.
+ * That alows a margin of error much bigger than any of
+ * the datetime factors.
+ */
+ if (factor&0xff00000000000000ULL) {
+ return 0;
+ }
+ ++unit;
+ }
+ return factor;
+}
+
+/* Euclidean algorithm on two positive numbers */
+static npy_uint64
+_uint64_euclidean_gcd(npy_uint64 x, npy_uint64 y)
+{
+ npy_uint64 tmp;
+
+ if (x > y) {
+ tmp = x;
+ x = y;
+ y = tmp;
+ }
+ while (x != y && y != 0) {
+ tmp = x % y;
+ x = y;
+ y = tmp;
+ }
+
+ return x;
+}
+
+NPY_NO_EXPORT PyObject *
+compute_datetime_metadata_greatest_common_divisor(
+ PyArray_Descr *type1,
+ PyArray_Descr *type2)
+{
+ PyArray_DatetimeMetaData *meta1, *meta2, *dt_data;
+ NPY_DATETIMEUNIT base;
+ npy_uint64 num1, num2, num;
+ int events = 1;
+
+ if ((type1->type_num != NPY_DATETIME &&
+ type1->type_num != NPY_TIMEDELTA) ||
+ (type1->type_num != NPY_DATETIME &&
+ type1->type_num != NPY_TIMEDELTA)) {
+ PyErr_SetString(PyExc_TypeError,
+ "Require datetime types for metadata "
+ "greatest common divisor operation");
+ return NULL;
+ }
+
+ meta1 = get_datetime_metadata_from_dtype(type1);
+ if (meta1 == NULL) {
+ return NULL;
+ }
+ meta2 = get_datetime_metadata_from_dtype(type2);
+ if (meta2 == NULL) {
+ return NULL;
+ }
+
+ if (meta1->events != 1 || meta2->events != 1) {
+ /*
+ * When there are events specified, both the units
+ * base and the events must match.
+ */
+ if (meta1->base != meta2->base || meta1->events != meta2->events) {
+ goto incompatible_units;
+ }
+ events = meta1->events;
+ }
+
+ num1 = (npy_uint64)meta1->num;
+ num2 = (npy_uint64)meta2->num;
+
+ /* First validate that the units have a reasonable GCD */
+ if (meta1->base == meta2->base) {
+ base = meta1->base;
+ }
+ else {
+ /*
+ * Years, Months, and Business days are incompatible with
+ * all other units.
+ */
+ if (meta1->base == NPY_FR_Y || meta1->base == NPY_FR_M ||
+ meta1->base == NPY_FR_B ||
+ meta2->base == NPY_FR_Y ||
+ meta2->base == NPY_FR_M ||
+ meta2->base == NPY_FR_B) {
+ goto incompatible_units;
+ }
+
+ /* Take the greater base (unit sizes are decreasing in enum) */
+ if (meta1->base > meta2->base) {
+ base = meta1->base;
+ num2 *= get_datetime_units_factor(meta2->base, meta1->base);
+ if (num2 == 0) {
+ goto units_overflow;
+ }
+ }
+ else {
+ base = meta2->base;
+ num1 *= get_datetime_units_factor(meta1->base, meta2->base);
+ if (num1 == 0) {
+ goto units_overflow;
+ }
+ }
+ }
+
+ /* Compute the GCD of the resulting multipliers */
+ num = _uint64_euclidean_gcd(num1, num2);
+
+ /* Create and return the metadata capsule */
+ dt_data = PyArray_malloc(sizeof(PyArray_DatetimeMetaData));
+ if (dt_data == NULL) {
+ return PyErr_NoMemory();
+ }
+
+ dt_data->base = base;
+ dt_data->num = (int)num;
+ if (dt_data->num <= 0 || num != (npy_uint64)dt_data->num) {
+ goto units_overflow;
+ }
+ dt_data->events = events;
+
+ return NpyCapsule_FromVoidPtr((void *)dt_data, simple_capsule_dtor);
+
+incompatible_units: {
+ PyObject *errmsg;
+ errmsg = PyUString_FromString("Cannot get "
+ "a common metadata divisor for types ");
+ PyUString_ConcatAndDel(&errmsg,
+ PyObject_Repr((PyObject *)type1));
+ PyUString_ConcatAndDel(&errmsg,
+ PyUString_FromString(" and "));
+ PyUString_ConcatAndDel(&errmsg,
+ PyObject_Repr((PyObject *)type2));
+ PyUString_ConcatAndDel(&errmsg,
+ PyUString_FromString(" because they have "
+ "incompatible base units or events"));
+ PyErr_SetObject(PyExc_TypeError, errmsg);
+ return NULL;
+ }
+units_overflow: {
+ PyObject *errmsg;
+ errmsg = PyUString_FromString("Integer overflow "
+ "getting a common metadata divisor for types ");
+ PyUString_ConcatAndDel(&errmsg,
+ PyObject_Repr((PyObject *)type1));
+ PyUString_ConcatAndDel(&errmsg,
+ PyUString_FromString(" and "));
+ PyUString_ConcatAndDel(&errmsg,
+ PyObject_Repr((PyObject *)type2));
+ PyErr_SetObject(PyExc_OverflowError, errmsg);
+ return NULL;
+ }
+}
+
+/*
* Converts a substring given by 'str' and 'len' into
* a date time unit enum value. The 'metastr' parameter
* is used for error messages, and may be NULL.
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 0728654ee..6c37b01f8 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -26,10 +26,47 @@ class TestDateTime(TestCase):
assert_raises(TypeError, np.dtype, 'm16')
def test_dtype_promotion(self):
+ # datetime <op> datetime requires matching units
assert_equal(np.promote_types(np.dtype('M8[Y]'), np.dtype('M8[Y]')),
np.dtype('M8[Y]'))
assert_raises(TypeError, np.promote_types,
np.dtype('M8[Y]'), np.dtype('M8[M]'))
+ # timedelta <op> timedelta computes the metadata gcd
+ assert_equal(
+ np.promote_types(np.dtype('m8[2Y]'), np.dtype('m8[2Y]')),
+ np.dtype('m8[2Y]'))
+ assert_equal(
+ np.promote_types(np.dtype('m8[12Y]'), np.dtype('m8[15Y]')),
+ np.dtype('m8[3Y]'))
+ assert_equal(
+ np.promote_types(np.dtype('m8[62M]'), np.dtype('m8[24M]')),
+ np.dtype('m8[2M]'))
+ assert_equal(
+ np.promote_types(np.dtype('m8[1W]'), np.dtype('m8[2D]')),
+ np.dtype('m8[1D]'))
+ assert_equal(
+ np.promote_types(np.dtype('m8[W]'), np.dtype('m8[13s]')),
+ np.dtype('m8[s]'))
+ assert_equal(
+ np.promote_types(np.dtype('m8[13W]'), np.dtype('m8[49s]')),
+ np.dtype('m8[7s]'))
+ # timedelta <op> timedelta raises when there is no reasonable gcd
+ assert_raises(TypeError, np.promote_types,
+ np.dtype('m8[Y]'), np.dtype('m8[M]'))
+ assert_raises(TypeError, np.promote_types,
+ np.dtype('m8[Y]'), np.dtype('m8[D]'))
+ assert_raises(TypeError, np.promote_types,
+ np.dtype('m8[Y]'), np.dtype('m8[B]'))
+ assert_raises(TypeError, np.promote_types,
+ np.dtype('m8[D]'), np.dtype('m8[B]'))
+ assert_raises(TypeError, np.promote_types,
+ np.dtype('m8[M]'), np.dtype('m8[W]'))
+ # timedelta <op> timedelta may overflow with big unit ranges
+ assert_raises(OverflowError, np.promote_types,
+ np.dtype('m8[W]'), np.dtype('m8[fs]'))
+ assert_raises(OverflowError, np.promote_types,
+ np.dtype('m8[s]'), np.dtype('m8[as]'))
+
def test_hours(self):
t = np.ones(3, dtype='M8[s]')