summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2021-10-19 17:48:35 +0300
committerGitHub <noreply@github.com>2021-10-19 17:48:35 +0300
commit405c6eec7937721bd2e284dceaf4f49d5e6e39e8 (patch)
tree219cfaf9b01b5a99c7aaf13b427618225c2e059f
parentdd2eaaabdb0451631e95376ae2b4d319082b438e (diff)
parent27f3b03dcc4fa2570825f9233f2e593c3c320673 (diff)
downloadnumpy-405c6eec7937721bd2e284dceaf4f49d5e6e39e8.tar.gz
Merge pull request #18905 from seberg/ufunc-refactor-2021
MAINT: Refactor reductions to use NEP 43 style dispatching/promotion
-rw-r--r--numpy/core/src/multiarray/array_method.c7
-rw-r--r--numpy/core/src/multiarray/array_method.h11
-rw-r--r--numpy/core/src/umath/_scaled_float_dtype.c52
-rw-r--r--numpy/core/src/umath/dispatching.c177
-rw-r--r--numpy/core/src/umath/dispatching.h4
-rw-r--r--numpy/core/src/umath/legacy_array_method.c19
-rw-r--r--numpy/core/src/umath/reduction.c57
-rw-r--r--numpy/core/src/umath/reduction.h110
-rw-r--r--numpy/core/src/umath/ufunc_object.c773
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c22
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h4
-rw-r--r--numpy/core/src/umath/umathmodule.c29
-rw-r--r--numpy/core/tests/test_custom_dtypes.py36
-rw-r--r--numpy/core/tests/test_datetime.py15
-rw-r--r--numpy/core/tests/test_ufunc.py69
-rw-r--r--numpy/ma/core.py2
16 files changed, 866 insertions, 521 deletions
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c
index 406b0c6ff..d93dac506 100644
--- a/numpy/core/src/multiarray/array_method.c
+++ b/numpy/core/src/multiarray/array_method.c
@@ -780,6 +780,13 @@ _masked_stridedloop_data_free(NpyAuxData *auxdata)
* This function wraps a regular unmasked strided-loop as a
* masked strided-loop, only calling the function for elements
* where the mask is True.
+ *
+ * TODO: Reductions also use this code to implement masked reductions.
+ * Before consolidating them, reductions had a special case for
+ * broadcasts: when the mask stride was 0 the code does not check all
+ * elements as `npy_memchr` currently does.
+ * It may be worthwhile to add such an optimization again if broadcasted
+ * masks are common enough.
*/
static int
generic_masked_strided_loop(PyArrayMethod_Context *context,
diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h
index b29c7c077..7b7372bd0 100644
--- a/numpy/core/src/multiarray/array_method.h
+++ b/numpy/core/src/multiarray/array_method.h
@@ -21,6 +21,17 @@ typedef enum {
NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2,
/* Whether the method supports unaligned access (not runtime) */
NPY_METH_SUPPORTS_UNALIGNED = 1 << 3,
+ /*
+ * Private flag for now for *logic* functions. The logical functions
+ * `logical_or` and `logical_and` can always cast the inputs to booleans
+ * "safely" (because that is how the cast to bool is defined).
+ * @seberg: I am not sure this is the best way to handle this, so its
+ * private for now (also it is very limited anyway).
+ * There is one "exception". NA aware dtypes cannot cast to bool
+ * (hopefully), so the `??->?` loop should error even with this flag.
+ * But a second NA fallback loop will be necessary.
+ */
+ _NPY_METH_FORCE_CAST_INPUTS = 1 << 17,
/* All flags which can change at runtime */
NPY_METH_RUNTIME_FLAGS = (
diff --git a/numpy/core/src/umath/_scaled_float_dtype.c b/numpy/core/src/umath/_scaled_float_dtype.c
index eeef33a3d..b6c19362a 100644
--- a/numpy/core/src/umath/_scaled_float_dtype.c
+++ b/numpy/core/src/umath/_scaled_float_dtype.c
@@ -398,6 +398,42 @@ float_to_from_sfloat_resolve_descriptors(
}
+/*
+ * Cast to boolean (for testing the logical functions a bit better).
+ */
+static int
+cast_sfloat_to_bool(PyArrayMethod_Context *NPY_UNUSED(context),
+ char *const data[], npy_intp const dimensions[],
+ npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+ npy_intp N = dimensions[0];
+ char *in = data[0];
+ char *out = data[1];
+ for (npy_intp i = 0; i < N; i++) {
+ *(npy_bool *)out = *(double *)in != 0;
+ in += strides[0];
+ out += strides[1];
+ }
+ return 0;
+}
+
+static NPY_CASTING
+sfloat_to_bool_resolve_descriptors(
+ PyArrayMethodObject *NPY_UNUSED(self),
+ PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+ PyArray_Descr *given_descrs[2],
+ PyArray_Descr *loop_descrs[2])
+{
+ Py_INCREF(given_descrs[0]);
+ loop_descrs[0] = given_descrs[0];
+ if (loop_descrs[0] == NULL) {
+ return -1;
+ }
+ loop_descrs[1] = PyArray_DescrFromType(NPY_BOOL); /* cannot fail */
+ return NPY_UNSAFE_CASTING;
+}
+
+
static int
init_casts(void)
{
@@ -453,6 +489,22 @@ init_casts(void)
return -1;
}
+ slots[0].slot = NPY_METH_resolve_descriptors;
+ slots[0].pfunc = &sfloat_to_bool_resolve_descriptors;
+ slots[1].slot = NPY_METH_strided_loop;
+ slots[1].pfunc = &cast_sfloat_to_bool;
+ slots[2].slot = 0;
+ slots[2].pfunc = NULL;
+
+ spec.name = "sfloat_to_bool_cast";
+ dtypes[0] = &PyArray_SFloatDType;
+ dtypes[1] = PyArray_DTypeFromTypeNum(NPY_BOOL);
+ Py_DECREF(dtypes[1]); /* immortal anyway */
+
+ if (PyArray_AddCastingImplementation_FromSpec(&spec, 0)) {
+ return -1;
+ }
+
return 0;
}
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index 40de28754..9c76b40e0 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -267,8 +267,39 @@ resolve_implementation_info(PyUFuncObject *ufunc,
* the subclass should be considered a better match
* (subclasses are always more specific).
*/
+ /* Whether this (normally output) dtype was specified at all */
+ if (op_dtypes[i] == NULL) {
+ /*
+ * When DType is completely unspecified, prefer abstract
+ * over concrete, assuming it will resolve.
+ * Furthermore, we cannot decide which abstract/None
+ * is "better", only concrete ones which are subclasses
+ * of Abstract ones are defined as worse.
+ */
+ npy_bool prev_is_concrete = NPY_FALSE;
+ npy_bool new_is_concrete = NPY_FALSE;
+ if ((prev_dtype != Py_None) &&
+ !NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) {
+ prev_is_concrete = NPY_TRUE;
+ }
+ if ((new_dtype != Py_None) &&
+ !NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
+ new_is_concrete = NPY_TRUE;
+ }
+ if (prev_is_concrete == new_is_concrete) {
+ best = -1;
+ }
+ else if (prev_is_concrete) {
+ unambiguously_equally_good = 0;
+ best = 1;
+ }
+ else {
+ unambiguously_equally_good = 0;
+ best = 0;
+ }
+ }
/* If either is None, the other is strictly more specific */
- if (prev_dtype == Py_None) {
+ else if (prev_dtype == Py_None) {
unambiguously_equally_good = 0;
best = 1;
}
@@ -289,13 +320,29 @@ resolve_implementation_info(PyUFuncObject *ufunc,
*/
best = -1;
}
+ else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) {
+ /* old is not abstract, so better (both not possible) */
+ unambiguously_equally_good = 0;
+ best = 0;
+ }
+ else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
+ /* new is not abstract, so better (both not possible) */
+ unambiguously_equally_good = 0;
+ best = 1;
+ }
/*
- * TODO: Unreachable, but we will need logic for abstract
- * DTypes to decide if one is a subclass of the other
- * (And their subclass relation is well defined.)
+ * TODO: This will need logic for abstract DTypes to decide if
+ * one is a subclass of the other (And their subclass
+ * relation is well defined). For now, we bail out
+ * in cas someone manages to get here.
*/
else {
- assert(0);
+ PyErr_SetString(PyExc_NotImplementedError,
+ "deciding which one of two abstract dtypes is "
+ "a better match is not yet implemented. This "
+ "will pick the better (or bail) in the future.");
+ *out_info = NULL;
+ return -1;
}
if ((current_best != -1) && (current_best != best)) {
@@ -612,6 +659,35 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
}
return info;
}
+ else if (info == NULL && op_dtypes[0] == NULL) {
+ /*
+ * If we have a reduction, fill in the unspecified input/array
+ * assuming it should have the same dtype as the operand input
+ * (or the output one if given).
+ * Then, try again. In some cases, this will choose different
+ * paths, such as `ll->?` instead of an `??->?` loop for `np.equal`
+ * when the input is `.l->.` (`.` meaning undefined). This will
+ * then cause an error. But cast to `?` would always lose
+ * information, and in many cases important information:
+ *
+ * ```python
+ * from operator import eq
+ * from functools import reduce
+ *
+ * reduce(eq, [1, 2, 3]) != reduce(eq, [True, True, True])
+ * ```
+ *
+ * The special cases being `logical_(and|or|xor)` which can always
+ * cast to boolean ahead of time and still give the right answer
+ * (unsafe cast to bool is fine here). We special case these at
+ * the time of this comment (NumPy 1.21).
+ */
+ assert(ufunc->nin == 2 && ufunc->nout == 1);
+ op_dtypes[0] = op_dtypes[2] != NULL ? op_dtypes[2] : op_dtypes[1];
+ Py_INCREF(op_dtypes[0]);
+ return promote_and_get_info_and_ufuncimpl(ufunc,
+ ops, signature, op_dtypes, allow_legacy_promotion, 1);
+ }
}
/*
@@ -743,3 +819,94 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
return method;
}
+
+
+/*
+ * Special promoter for the logical ufuncs. The logical ufuncs can always
+ * use the ??->? and still get the correct output (as long as the output
+ * is not supposed to be `object`).
+ */
+static int
+logical_ufunc_promoter(PyUFuncObject *NPY_UNUSED(ufunc),
+ PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *new_op_dtypes[])
+{
+ /*
+ * If we find any object DType at all, we currently force to object.
+ * However, if the output is specified and not object, there is no point,
+ * it should be just as well to cast the input rather than doing the
+ * unsafe out cast.
+ */
+ int force_object = 0;
+
+ for (int i = 0; i < 3; i++) {
+ PyArray_DTypeMeta *item;
+ if (signature[i] != NULL) {
+ item = signature[i];
+ Py_INCREF(item);
+ if (item->type_num == NPY_OBJECT) {
+ force_object = 1;
+ }
+ }
+ else {
+ /* Always override to boolean */
+ item = PyArray_DTypeFromTypeNum(NPY_BOOL);
+ if (op_dtypes[i] != NULL && op_dtypes[i]->type_num == NPY_OBJECT) {
+ force_object = 1;
+ }
+ }
+ new_op_dtypes[i] = item;
+ }
+
+ if (!force_object || (op_dtypes[2] != NULL
+ && op_dtypes[2]->type_num != NPY_OBJECT)) {
+ return 0;
+ }
+ /*
+ * Actually, we have to use the OBJECT loop after all, set all we can
+ * to object (that might not work out, but try).
+ *
+ * NOTE: Change this to check for `op_dtypes[0] == NULL` to STOP
+ * returning `object` for `np.logical_and.reduce(obj_arr)`
+ * which will also affect `np.all` and `np.any`!
+ */
+ for (int i = 0; i < 3; i++) {
+ if (signature[i] != NULL) {
+ continue;
+ }
+ Py_SETREF(new_op_dtypes[i], PyArray_DTypeFromTypeNum(NPY_OBJECT));
+ }
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+install_logical_ufunc_promoter(PyObject *ufunc)
+{
+ if (PyObject_Type(ufunc) != (PyObject *)&PyUFunc_Type) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "internal numpy array, logical ufunc was not a ufunc?!");
+ return -1;
+ }
+ PyObject *dtype_tuple = PyTuple_Pack(3,
+ &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type, NULL);
+ if (dtype_tuple == NULL) {
+ return -1;
+ }
+ PyObject *promoter = PyCapsule_New(&logical_ufunc_promoter,
+ "numpy._ufunc_promoter", NULL);
+ if (promoter == NULL) {
+ Py_DECREF(dtype_tuple);
+ return -1;
+ }
+
+ PyObject *info = PyTuple_Pack(2, dtype_tuple, promoter);
+ Py_DECREF(dtype_tuple);
+ Py_DECREF(promoter);
+ if (info == NULL) {
+ return -1;
+ }
+
+ return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
+}
+
diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h
index 8d116873c..2f314615d 100644
--- a/numpy/core/src/umath/dispatching.h
+++ b/numpy/core/src/umath/dispatching.h
@@ -26,4 +26,8 @@ NPY_NO_EXPORT PyObject *
add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate);
+NPY_NO_EXPORT int
+install_logical_ufunc_promoter(PyObject *ufunc);
+
+
#endif /*_NPY_DISPATCHING_H */
diff --git a/numpy/core/src/umath/legacy_array_method.c b/numpy/core/src/umath/legacy_array_method.c
index 77b1b9013..a423823d4 100644
--- a/numpy/core/src/umath/legacy_array_method.c
+++ b/numpy/core/src/umath/legacy_array_method.c
@@ -217,6 +217,25 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc,
*/
int any_output_flexible = 0;
NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (ufunc->nargs == 3 &&
+ signature[0]->type_num == NPY_BOOL &&
+ signature[1]->type_num == NPY_BOOL &&
+ signature[2]->type_num == NPY_BOOL && (
+ strcmp(ufunc->name, "logical_or") == 0 ||
+ strcmp(ufunc->name, "logical_and") == 0 ||
+ strcmp(ufunc->name, "logical_xor") == 0)) {
+ /*
+ * This is a logical ufunc, and the `??->?` loop`. It is always OK
+ * to cast any input to bool, because that cast is defined by
+ * truthiness.
+ * This allows to ensure two things:
+ * 1. `np.all`/`np.any` know that force casting the input is OK
+ * (they must do this since there are no `?l->?`, etc. loops)
+ * 2. The logical functions automatically work for any DType
+ * implementing a cast to boolean.
+ */
+ flags = _NPY_METH_FORCE_CAST_INPUTS;
+ }
for (int i = 0; i < ufunc->nin+ufunc->nout; i++) {
if (signature[i]->singleton->flags & (
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index d5a251368..c28c8abd8 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -145,14 +145,12 @@ PyArray_CopyInitialReduceValues(
* boilerplate code, just calling the appropriate inner loop function where
* necessary.
*
+ * context : The ArrayMethod context (with ufunc, method, and descriptors).
* operand : The array to be reduced.
* out : NULL, or the array into which to place the result.
* wheremask : NOT YET SUPPORTED, but this parameter is placed here
* so that support can be added in the future without breaking
* API compatibility. Pass in NULL.
- * operand_dtype : The dtype the inner loop expects for the operand.
- * result_dtype : The dtype the inner loop expects for the result.
- * casting : The casting rule to apply to the operands.
* axis_flags : Flags indicating the reduction axes of 'operand'.
* reorderable : If True, the reduction being done is reorderable, which
* means specifying multiple axes of reduction at once is ok,
@@ -182,10 +180,8 @@ PyArray_CopyInitialReduceValues(
* generalized ufuncs!)
*/
NPY_NO_EXPORT PyArrayObject *
-PyUFunc_ReduceWrapper(
+PyUFunc_ReduceWrapper(PyArrayMethod_Context *context,
PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask,
- PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype,
- NPY_CASTING casting,
npy_bool *axis_flags, int reorderable, int keepdims,
PyObject *identity, PyArray_ReduceLoopFunc *loop,
void *data, npy_intp buffersize, const char *funcname, int errormask)
@@ -199,6 +195,8 @@ PyUFunc_ReduceWrapper(
PyArrayObject *op[3];
PyArray_Descr *op_dtypes[3];
npy_uint32 it_flags, op_flags[3];
+ /* Loop auxdata (must be freed on error) */
+ NpyAuxData *auxdata = NULL;
/* More than one axis means multiple orders are possible */
if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) {
@@ -221,8 +219,8 @@ PyUFunc_ReduceWrapper(
/* Set up the iterator */
op[0] = out;
op[1] = operand;
- op_dtypes[0] = result_dtype;
- op_dtypes[1] = operand_dtype;
+ op_dtypes[0] = context->descriptors[0];
+ op_dtypes[1] = context->descriptors[1];
it_flags = NPY_ITER_BUFFERED |
NPY_ITER_EXTERNAL_LOOP |
@@ -291,7 +289,7 @@ PyUFunc_ReduceWrapper(
}
iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, it_flags,
- NPY_KEEPORDER, casting,
+ NPY_KEEPORDER, NPY_UNSAFE_CASTING,
op_flags,
op_dtypes,
PyArray_NDIM(operand), op_axes, NULL, buffersize);
@@ -301,9 +299,29 @@ PyUFunc_ReduceWrapper(
result = NpyIter_GetOperandArray(iter)[0];
- int needs_api = NpyIter_IterationNeedsAPI(iter);
- /* Start with the floating-point exception flags cleared */
- npy_clear_floatstatus_barrier((char*)&iter);
+ PyArrayMethod_StridedLoop *strided_loop;
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ npy_intp fixed_strides[3];
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ if (wheremask != NULL) {
+ if (PyArrayMethod_GetMaskedStridedLoop(context,
+ 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ }
+ else {
+ if (context->method->get_strided_loop(context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ }
+
+ int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
/*
* Initialize the result to the reduction unit if possible,
@@ -345,16 +363,18 @@ PyUFunc_ReduceWrapper(
strideptr = NpyIter_GetInnerStrideArray(iter);
countptr = NpyIter_GetInnerLoopSizePtr(iter);
- if (loop(iter, dataptr, strideptr, countptr,
- iternext, needs_api, skip_first_count, data) < 0) {
+ if (loop(context, strided_loop, auxdata,
+ iter, dataptr, strideptr, countptr, iternext,
+ needs_api, skip_first_count) < 0) {
goto fail;
}
}
- /* Check whether any errors occurred during the loop */
- if (PyErr_Occurred() ||
- _check_ufunc_fperr(errormask, NULL, "reduce") < 0) {
- goto fail;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even on error */
+ if (_check_ufunc_fperr(errormask, NULL, "reduce") < 0) {
+ goto fail;
+ }
}
if (out != NULL) {
@@ -369,6 +389,7 @@ PyUFunc_ReduceWrapper(
return result;
fail:
+ NPY_AUXDATA_FREE(auxdata);
if (iter != NULL) {
NpyIter_Deallocate(iter);
}
diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h
index 372605dba..2170e27a7 100644
--- a/numpy/core/src/umath/reduction.h
+++ b/numpy/core/src/umath/reduction.h
@@ -19,93 +19,17 @@ typedef int (PyArray_AssignReduceIdentityFunc)(PyArrayObject *result,
void *data);
/*
- * This is a function for the reduce loop.
+ * Inner definition of the reduce loop, only used for a static function.
+ * At some point around NumPy 1.6, there was probably an intention to make
+ * the reduce loop customizable at this level (per ufunc?).
*
- * The needs_api parameter indicates whether it's ok to release the GIL during
- * the loop, such as when the iternext() function never calls
- * a function which could raise a Python exception.
- *
- * The skip_first_count parameter indicates how many elements need to be
- * skipped based on NpyIter_IsFirstVisit checks. This can only be positive
- * when the 'assign_identity' parameter was NULL when calling
- * PyArray_ReduceWrapper.
- *
- * The loop gets two data pointers and two strides, and should
- * look roughly like this:
- * {
- * NPY_BEGIN_THREADS_DEF;
- * if (!needs_api) {
- * NPY_BEGIN_THREADS;
- * }
- * // This first-visit loop can be skipped if 'assign_identity' was non-NULL
- * if (skip_first_count > 0) {
- * do {
- * char *data0 = dataptr[0], *data1 = dataptr[1];
- * npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
- * npy_intp count = *countptr;
- *
- * // Skip any first-visit elements
- * if (NpyIter_IsFirstVisit(iter, 0)) {
- * if (stride0 == 0) {
- * --count;
- * --skip_first_count;
- * data1 += stride1;
- * }
- * else {
- * skip_first_count -= count;
- * count = 0;
- * }
- * }
- *
- * while (count--) {
- * *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
- * *(operand_t *)data1);
- * data0 += stride0;
- * data1 += stride1;
- * }
- *
- * // Jump to the faster loop when skipping is done
- * if (skip_first_count == 0) {
- * if (iternext(iter)) {
- * break;
- * }
- * else {
- * goto finish_loop;
- * }
- * }
- * } while (iternext(iter));
- * }
- * do {
- * char *data0 = dataptr[0], *data1 = dataptr[1];
- * npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
- * npy_intp count = *countptr;
- *
- * while (count--) {
- * *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
- * *(operand_t *)data1);
- * data0 += stride0;
- * data1 += stride1;
- * }
- * } while (iternext(iter));
- * finish_loop:
- * if (!needs_api) {
- * NPY_END_THREADS;
- * }
- * return (needs_api && PyErr_Occurred()) ? -1 : 0;
- * }
- *
- * If needs_api is True, this function should call PyErr_Occurred()
- * to check if an error occurred during processing, and return -1 for
- * error, 0 for success.
+ * TODO: This should be refactored/removed.
*/
-typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
- char **dataptr,
- npy_intp const *strideptr,
- npy_intp const *countptr,
- NpyIter_IterNextFunc *iternext,
- int needs_api,
- npy_intp skip_first_count,
- void *data);
+typedef int (PyArray_ReduceLoopFunc)(PyArrayMethod_Context *context,
+ PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata,
+ NpyIter *iter, char **dataptrs, npy_intp const *strides,
+ npy_intp const *countptr, NpyIter_IterNextFunc *iternext,
+ int needs_api, npy_intp skip_first_count);
/*
* This function executes all the standard NumPy reduction function
@@ -138,16 +62,10 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
* errormask : forwarded from _get_bufsize_errmask
*/
NPY_NO_EXPORT PyArrayObject *
-PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
- PyArrayObject *wheremask,
- PyArray_Descr *operand_dtype,
- PyArray_Descr *result_dtype,
- NPY_CASTING casting,
- npy_bool *axis_flags, int reorderable,
- int keepdims,
- PyObject *identity,
- PyArray_ReduceLoopFunc *loop,
- void *data, npy_intp buffersize, const char *funcname,
- int errormask);
+PyUFunc_ReduceWrapper(PyArrayMethod_Context *context,
+ PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask,
+ npy_bool *axis_flags, int reorderable, int keepdims,
+ PyObject *identity, PyArray_ReduceLoopFunc *loop,
+ void *data, npy_intp buffersize, const char *funcname, int errormask);
#endif
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 42290e8c9..228e67576 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -990,6 +990,7 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
}
/* Convert and fill in output arguments */
+ memset(out_op_DTypes + nin, 0, nout * sizeof(*out_op_DTypes));
if (full_args.out != NULL) {
for (int i = 0; i < nout; i++) {
obj = PyTuple_GET_ITEM(full_args.out, i);
@@ -1047,6 +1048,7 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
PyArrayObject **op, PyArray_Descr **dtypes,
NPY_CASTING casting, npy_intp buffersize)
{
+ int force_cast_input = ufuncimpl->flags & _NPY_METH_FORCE_CAST_INPUTS;
int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout;
for (i = 0; i < nop; ++i) {
@@ -1070,7 +1072,13 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
must_copy = 1;
}
- if (PyArray_MinCastSafety(safety, casting) != casting) {
+ if (force_cast_input && i < nin) {
+ /*
+ * ArrayMethod flagged to ignore casting (logical funcs
+ * can force cast to bool)
+ */
+ }
+ else if (PyArray_MinCastSafety(safety, casting) != casting) {
return 0; /* the cast is not safe enough */
}
}
@@ -1360,8 +1368,15 @@ validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc,
*/
return 0;
}
- if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) {
- return -1;
+ if (method->flags & _NPY_METH_FORCE_CAST_INPUTS) {
+ if (PyUFunc_ValidateOutCasting(ufunc, casting, ops, descriptors) < 0) {
+ return -1;
+ }
+ }
+ else {
+ if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) {
+ return -1;
+ }
}
return 0;
}
@@ -2470,9 +2485,9 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
/* Final preparation of the arraymethod call */
PyArrayMethod_Context context = {
- .caller = (PyObject *)ufunc,
- .method = ufuncimpl,
- .descriptors = operation_descrs,
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
};
PyArrayMethod_StridedLoop *strided_loop;
NPY_ARRAYMETHOD_FLAGS flags = 0;
@@ -2592,9 +2607,9 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
/* Final preparation of the arraymethod call */
PyArrayMethod_Context context = {
- .caller = (PyObject *)ufunc,
- .method = ufuncimpl,
- .descriptors = operation_descrs,
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
};
/* Do the ufunc loop */
@@ -2661,195 +2676,129 @@ PyUFunc_GenericFunction(PyUFuncObject *NPY_UNUSED(ufunc),
/*
- * Given the output type, finds the specified binary op. The
- * ufunc must have nin==2 and nout==1. The function may modify
- * otype if the given type isn't found.
+ * Promote and resolve a reduction like operation.
*
- * Returns 0 on success, -1 on failure.
+ * @param ufunc
+ * @param arr The operation array
+ * @param out The output array or NULL if not provided. Note that NumPy always
+ * used out to mean the same as `dtype=out.dtype` and never passed
+ * the array itself to the type-resolution.
+ * @param signature The DType signature, which may already be set due to the
+ * dtype passed in by the user, or the special cases (add, multiply).
+ * (Contains strong references and may be modified.)
+ * @param enforce_uniform_args If `NPY_TRUE` fully uniform dtypes/descriptors
+ * are enforced as required for accumulate and (currently) reduceat.
+ * @param out_descrs New references to the resolved descriptors (on success).
+ * @param method The ufunc method, "reduce", "reduceat", or "accumulate".
+
+ * @returns ufuncimpl The `ArrayMethod` implemention to use. Or NULL if an
+ * error occurred.
*/
-static int
-get_binary_op_function(PyUFuncObject *ufunc, int *otype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+static PyArrayMethodObject *
+reducelike_promote_and_resolve(PyUFuncObject *ufunc,
+ PyArrayObject *arr, PyArrayObject *out,
+ PyArray_DTypeMeta *signature[3],
+ npy_bool enforce_uniform_args, PyArray_Descr *out_descrs[3],
+ char *method)
{
- int i;
-
- NPY_UF_DBG_PRINT1("Getting binary op function for type number %d\n",
- *otype);
-
- /* If the type is custom and there are userloops, search for it here */
- if (ufunc->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
- PyObject *key, *obj;
- key = PyLong_FromLong(*otype);
- if (key == NULL) {
- return -1;
- }
- obj = PyDict_GetItemWithError(ufunc->userloops, key);
- Py_DECREF(key);
- if (obj == NULL && PyErr_Occurred()) {
- return -1;
- }
- else if (obj != NULL) {
- PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL);
- if (funcdata == NULL) {
- return -1;
- }
- while (funcdata != NULL) {
- int *types = funcdata->arg_types;
-
- if (types[0] == *otype && types[1] == *otype &&
- types[2] == *otype) {
- *out_innerloop = funcdata->func;
- *out_innerloopdata = funcdata->data;
- return 0;
- }
+ /*
+ * Note that the `ops` is not realy correct. But legacy resolution
+ * cannot quite handle the correct ops (e.g. a NULL first item if `out`
+ * is NULL), and it should only matter in very strange cases.
+ */
+ PyArrayObject *ops[3] = {arr, arr, NULL};
+ /*
+ * TODO: If `out` is not provided, arguably `initial` could define
+ * the first DType (and maybe also the out one), that way
+ * `np.add.reduce([1, 2, 3], initial=3.4)` would return a float
+ * value. As of 1.20, it returned an integer, so that should
+ * probably go to an error/warning first.
+ */
+ PyArray_DTypeMeta *operation_DTypes[3] = {
+ NULL, NPY_DTYPE(PyArray_DESCR(arr)), NULL};
+ Py_INCREF(operation_DTypes[1]);
- funcdata = funcdata->next;
- }
- }
+ if (out != NULL) {
+ operation_DTypes[0] = NPY_DTYPE(PyArray_DESCR(out));
+ Py_INCREF(operation_DTypes[0]);
+ operation_DTypes[2] = operation_DTypes[0];
+ Py_INCREF(operation_DTypes[2]);
}
- /* Search for a function with compatible inputs */
- for (i = 0; i < ufunc->ntypes; ++i) {
- char *types = ufunc->types + i*ufunc->nargs;
-
- NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n",
- types[0], types[1], types[2]);
-
- if (PyArray_CanCastSafely(*otype, types[0]) &&
- types[0] == types[1] &&
- (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
- /* If the signature is "xx->x", we found the loop */
- if (types[2] == types[0]) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- *otype = types[0];
- return 0;
- }
- /*
- * Otherwise, we found the natural type of the reduction,
- * replace otype and search again
- */
- else {
- *otype = types[2];
- break;
- }
- }
+ PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
+ ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE);
+ Py_DECREF(operation_DTypes[1]);
+ if (out != NULL) {
+ Py_DECREF(operation_DTypes[0]);
+ Py_DECREF(operation_DTypes[2]);
}
-
- /* Search for the exact function */
- for (i = 0; i < ufunc->ntypes; ++i) {
- char *types = ufunc->types + i*ufunc->nargs;
-
- if (PyArray_CanCastSafely(*otype, types[0]) &&
- types[0] == types[1] &&
- types[1] == types[2] &&
- (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
- /* Since the signature is "xx->x", we found the loop */
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- *otype = types[0];
- return 0;
- }
+ if (ufuncimpl == NULL) {
+ return NULL;
}
- return -1;
-}
-
-static int
-reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr,
- PyArray_Descr *odtype, PyArray_Descr **out_dtype)
-{
- int i, retcode;
- PyArrayObject *op[3] = {arr, arr, NULL};
- PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
- const char *ufunc_name = ufunc_get_name_cstr(ufunc);
- PyObject *type_tup = NULL;
-
- *out_dtype = NULL;
-
/*
- * If odtype is specified, make a type tuple for the type
- * resolution.
+ * Find the correct descriptors for the operation. We use unsafe casting
+ * for historic reasons: The logic ufuncs required it to cast everything to
+ * boolean. However, we now special case the logical ufuncs, so that the
+ * casting safety could in principle be set to the default same-kind.
+ * (although this should possibly happen through a deprecation)
*/
- if (odtype != NULL) {
- type_tup = PyTuple_Pack(3, odtype, odtype, Py_None);
- if (type_tup == NULL) {
- return -1;
- }
- }
-
- /* Use the type resolution function to find our loop */
- retcode = ufunc->type_resolver(
- ufunc, NPY_UNSAFE_CASTING,
- op, type_tup, dtypes);
- Py_DECREF(type_tup);
- if (retcode == -1) {
- return -1;
- }
- else if (retcode == -2) {
- PyErr_Format(PyExc_RuntimeError,
- "type resolution returned NotImplemented to "
- "reduce ufunc %s", ufunc_name);
- return -1;
+ if (resolve_descriptors(3, ufunc, ufuncimpl,
+ ops, out_descrs, signature, NPY_UNSAFE_CASTING) < 0) {
+ return NULL;
}
/*
- * The first two type should be equivalent. Because of how
- * reduce has historically behaved in NumPy, the return type
- * could be different, and it is the return type on which the
- * reduction occurs.
+ * The first operand and output should be the same array, so they should
+ * be identical. The second argument can be different for reductions,
+ * but is checked to be identical for accumulate and reduceat.
*/
- if (!PyArray_EquivTypes(dtypes[0], dtypes[1])) {
- for (i = 0; i < 3; ++i) {
- Py_DECREF(dtypes[i]);
- }
- PyErr_Format(PyExc_RuntimeError,
- "could not find a type resolution appropriate for "
- "reduce ufunc %s", ufunc_name);
- return -1;
+ if (out_descrs[0] != out_descrs[2] || (
+ enforce_uniform_args && out_descrs[0] != out_descrs[1])) {
+ PyErr_Format(PyExc_TypeError,
+ "the resolved dtypes are not compatible with %s.%s",
+ ufunc_get_name_cstr(ufunc), method);
+ goto fail;
+ }
+ /* TODO: This really should _not_ be unsafe casting (same above)! */
+ if (validate_casting(ufuncimpl,
+ ufunc, ops, out_descrs, NPY_UNSAFE_CASTING) < 0) {
+ goto fail;
}
- Py_DECREF(dtypes[0]);
- Py_DECREF(dtypes[1]);
- *out_dtype = dtypes[2];
+ return ufuncimpl;
- return 0;
+ fail:
+ for (int i = 0; i < 3; ++i) {
+ Py_DECREF(out_descrs[i]);
+ }
+ return NULL;
}
+
static int
-reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides,
- npy_intp const *countptr, NpyIter_IterNextFunc *iternext,
- int needs_api, npy_intp skip_first_count, void *data)
+reduce_loop(PyArrayMethod_Context *context,
+ PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata,
+ NpyIter *iter, char **dataptrs, npy_intp const *strides,
+ npy_intp const *countptr, NpyIter_IterNextFunc *iternext,
+ int needs_api, npy_intp skip_first_count)
{
- PyArray_Descr *dtypes[3], **iter_dtypes;
- PyUFuncObject *ufunc = (PyUFuncObject *)data;
- char *dataptrs_copy[3];
- npy_intp strides_copy[3];
+ int retval;
+ char *dataptrs_copy[4];
+ npy_intp strides_copy[4];
npy_bool masked;
- /* The normal selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
-
NPY_BEGIN_THREADS_DEF;
/* Get the number of operands, to determine whether "where" is used */
masked = (NpyIter_GetNOp(iter) == 3);
- /* Get the inner loop */
- iter_dtypes = NpyIter_GetDescrArray(iter);
- dtypes[0] = iter_dtypes[0];
- dtypes[1] = iter_dtypes[1];
- dtypes[2] = iter_dtypes[0];
- if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &innerloop, &innerloopdata, &needs_api) < 0) {
- return -1;
+ if (!needs_api) {
+ NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter));
}
- NPY_BEGIN_THREADS_NDITER(iter);
-
if (skip_first_count > 0) {
- do {
+ assert(!masked); /* Path currently not available for masked */
+ while (1) {
npy_intp count = *countptr;
/* Skip any first-visit elements */
@@ -2872,27 +2821,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides,
strides_copy[0] = strides[0];
strides_copy[1] = strides[1];
strides_copy[2] = strides[0];
- innerloop(dataptrs_copy, &count,
- strides_copy, innerloopdata);
- if (needs_api && PyErr_Occurred()) {
+ retval = strided_loop(context,
+ dataptrs_copy, &count, strides_copy, auxdata);
+ if (retval < 0) {
goto finish_loop;
}
- /* Jump to the faster loop when skipping is done */
- if (skip_first_count == 0) {
- if (iternext(iter)) {
- break;
- }
- else {
- goto finish_loop;
- }
+ /* Advance loop, and abort on error (or finish) */
+ if (!iternext(iter)) {
+ goto finish_loop;
}
- } while (iternext(iter));
- }
- if (needs_api && PyErr_Occurred()) {
- goto finish_loop;
+ /* When skipping is done break and continue with faster loop */
+ if (skip_first_count == 0) {
+ break;
+ }
+ }
}
do {
@@ -2903,42 +2848,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides,
strides_copy[0] = strides[0];
strides_copy[1] = strides[1];
strides_copy[2] = strides[0];
-
- if (!masked) {
- innerloop(dataptrs_copy, countptr,
- strides_copy, innerloopdata);
+ if (masked) {
+ dataptrs_copy[3] = dataptrs[2];
+ strides_copy[3] = strides[2];
}
- else {
- npy_intp count = *countptr;
- char *maskptr = dataptrs[2];
- npy_intp mask_stride = strides[2];
- /* Optimization for when the mask is broadcast */
- npy_intp n = mask_stride == 0 ? count : 1;
- while (count) {
- char mask = *maskptr;
- maskptr += mask_stride;
- while (n < count && mask == *maskptr) {
- n++;
- maskptr += mask_stride;
- }
- /* If mask set, apply inner loop on this contiguous region */
- if (mask) {
- innerloop(dataptrs_copy, &n,
- strides_copy, innerloopdata);
- }
- dataptrs_copy[0] += n * strides[0];
- dataptrs_copy[1] += n * strides[1];
- dataptrs_copy[2] = dataptrs_copy[0];
- count -= n;
- n = 1;
- }
+
+ retval = strided_loop(context,
+ dataptrs_copy, countptr, strides_copy, auxdata);
+ if (retval < 0) {
+ goto finish_loop;
}
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+
+ } while (iternext(iter));
finish_loop:
NPY_END_THREADS;
- return (needs_api && PyErr_Occurred()) ? -1 : 0;
+ return retval;
}
/*
@@ -2959,15 +2885,14 @@ finish_loop:
* this function does not validate them.
*/
static PyArrayObject *
-PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
- int naxes, int *axes, PyArray_Descr *odtype, int keepdims,
+PyUFunc_Reduce(PyUFuncObject *ufunc,
+ PyArrayObject *arr, PyArrayObject *out,
+ int naxes, int *axes, PyArray_DTypeMeta *signature[3], int keepdims,
PyObject *initial, PyArrayObject *wheremask)
{
int iaxes, ndim;
npy_bool reorderable;
npy_bool axis_flags[NPY_MAXDIMS];
- PyArray_Descr *dtype;
- PyArrayObject *result;
PyObject *identity;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
/* These parameters come from a TLS global */
@@ -2994,6 +2919,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
}
/* Get the identity */
+ /* TODO: Both of these should be provided by the ArrayMethod! */
identity = _get_identity(ufunc, &reorderable);
if (identity == NULL) {
return NULL;
@@ -3017,21 +2943,27 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
Py_INCREF(initial); /* match the reference count in the if above */
}
- /* Get the reduction dtype */
- if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) {
+ PyArray_Descr *descrs[3];
+ PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+ arr, out, signature, NPY_FALSE, descrs, "reduce");
+ if (ufuncimpl == NULL) {
Py_DECREF(initial);
return NULL;
}
- result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype,
- NPY_UNSAFE_CASTING,
- axis_flags, reorderable,
- keepdims,
- initial,
- reduce_loop,
- ufunc, buffersize, ufunc_name, errormask);
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = descrs,
+ };
- Py_DECREF(dtype);
+ PyArrayObject *result = PyUFunc_ReduceWrapper(&context,
+ arr, out, wheremask, axis_flags, reorderable, keepdims,
+ initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask);
+
+ for (int i = 0; i < 3; i++) {
+ Py_DECREF(descrs[i]);
+ }
Py_DECREF(initial);
return result;
}
@@ -3039,23 +2971,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
static PyObject *
PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
- int axis, int otype)
+ int axis, PyArray_DTypeMeta *signature[3])
{
PyArrayObject *op[2];
- PyArray_Descr *op_dtypes[2] = {NULL, NULL};
int op_axes_arrays[2][NPY_MAXDIMS];
int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]};
npy_uint32 op_flags[2];
- int idim, ndim, otype_final;
+ int idim, ndim;
int needs_api, need_outer_iterator;
- NpyIter *iter = NULL;
+ int res = 0;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata = NULL;
- const char *ufunc_name = ufunc_get_name_cstr(ufunc);
+ NpyIter *iter = NULL;
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -3077,42 +3007,32 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
/* Take a reference to out for later returning */
Py_XINCREF(out);
- otype_final = otype;
- if (get_binary_op_function(ufunc, &otype_final,
- &innerloop, &innerloopdata) < 0) {
- PyArray_Descr *dtype = PyArray_DescrFromType(otype);
- PyErr_Format(PyExc_ValueError,
- "could not find a matching type for %s.accumulate, "
- "requested type has type code '%c'",
- ufunc_name, dtype ? dtype->type : '-');
- Py_XDECREF(dtype);
- goto fail;
+ PyArray_Descr *descrs[3];
+ PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+ arr, out, signature, NPY_TRUE, descrs, "accumulate");
+ if (ufuncimpl == NULL) {
+ return NULL;
}
- ndim = PyArray_NDIM(arr);
+ /* The below code assumes that all descriptors are identical: */
+ assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]);
- /*
- * Set up the output data type, using the input's exact
- * data type if the type number didn't change to preserve
- * metadata
- */
- if (PyArray_DESCR(arr)->type_num == otype_final) {
- if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) {
- op_dtypes[0] = PyArray_DESCR(arr);
- Py_INCREF(op_dtypes[0]);
- }
- else {
- op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr),
- NPY_NATIVE);
- }
- }
- else {
- op_dtypes[0] = PyArray_DescrFromType(otype_final);
- }
- if (op_dtypes[0] == NULL) {
+ if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) {
+ /* This can be removed, but the initial element copy needs fixing */
+ PyErr_SetString(PyExc_TypeError,
+ "accumulation currently only supports `object` dtype with "
+ "references");
goto fail;
}
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = descrs,
+ };
+
+ ndim = PyArray_NDIM(arr);
+
#if NPY_UF_DBG_TRACING
printf("Found %s.accumulate inner loop with dtype : ", ufunc_name);
PyObject_Print((PyObject *)op_dtypes[0], stdout, 0);
@@ -3138,9 +3058,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
need_outer_iterator = (ndim > 1);
/* We can't buffer, so must do UPDATEIFCOPY */
if (!PyArray_ISALIGNED(arr) || (out && !PyArray_ISALIGNED(out)) ||
- !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr)) ||
+ !PyArray_EquivTypes(descrs[1], PyArray_DESCR(arr)) ||
(out &&
- !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) {
+ !PyArray_EquivTypes(descrs[0], PyArray_DESCR(out)))) {
need_outer_iterator = 1;
}
/* If input and output overlap in memory, use iterator to figure it out */
@@ -3153,7 +3073,6 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
NPY_ITER_REFS_OK|
NPY_ITER_COPY_IF_OVERLAP;
- PyArray_Descr **op_dtypes_param = NULL;
/*
* The way accumulate is set up, we can't do buffering,
@@ -3170,13 +3089,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
*/
op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
- op_dtypes_param = op_dtypes;
- op_dtypes[1] = op_dtypes[0];
+
NPY_UF_DBG_PRINT("Allocating outer iterator\n");
iter = NpyIter_AdvancedNew(2, op, flags,
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
- op_flags,
- op_dtypes_param,
+ op_flags, descrs,
ndim_iter, op_axes, NULL, 0);
if (iter == NULL) {
goto fail;
@@ -3194,14 +3111,14 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
}
}
- /* Get the output */
+ /* Get the output from the iterator if it was allocated */
if (out == NULL) {
if (iter) {
op[0] = out = NpyIter_GetOperandArray(iter)[0];
Py_INCREF(out);
}
else {
- PyArray_Descr *dtype = op_dtypes[0];
+ PyArray_Descr *dtype = descrs[0];
Py_INCREF(dtype);
op[0] = out = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype,
@@ -3210,10 +3127,31 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
if (out == NULL) {
goto fail;
}
-
}
}
+ npy_intp fixed_strides[3];
+ if (need_outer_iterator) {
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ }
+ else {
+ fixed_strides[0] = PyArray_STRIDES(op[0])[axis];
+ fixed_strides[1] = PyArray_STRIDES(op[1])[axis];
+ fixed_strides[2] = fixed_strides[0];
+ }
+
+
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (ufuncimpl->get_strided_loop(&context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
+
/*
* If the reduction axis has size zero, either return the reduction
* unit for UFUNC_REDUCE, or return the zero-sized output array
@@ -3234,7 +3172,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
NpyIter_IterNextFunc *iternext;
char **dataptr;
- int itemsize = op_dtypes[0]->elsize;
+ int itemsize = descrs[0]->elsize;
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
@@ -3242,8 +3180,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
goto fail;
}
dataptr = NpyIter_GetDataPtrArray(iter);
- needs_api = NpyIter_IterationNeedsAPI(iter);
-
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
/* Execute the loop with just the outer iterator */
count_m1 = PyArray_DIM(op[1], axis)-1;
@@ -3257,7 +3194,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
stride_copy[1] = stride1;
stride_copy[2] = stride0;
- NPY_BEGIN_THREADS_NDITER(iter);
+ if (!needs_api) {
+ NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter));
+ }
do {
dataptr_copy[0] = dataptr[0];
@@ -3270,7 +3209,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
* Output (dataptr[0]) and input (dataptr[1]) may point to
* the same memory, e.g. np.add.accumulate(a, out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of the
* reference count being zero temporarily.
@@ -3290,18 +3229,17 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[2] += stride0;
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
(int)count_m1);
- innerloop(dataptr_copy, &count_m1,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count_m1, stride_copy, auxdata);
}
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+ } while (res == 0 && iternext(iter));
NPY_END_THREADS;
}
else if (iter == NULL) {
char *dataptr_copy[3];
- npy_intp stride_copy[3];
- int itemsize = op_dtypes[0]->elsize;
+ int itemsize = descrs[0]->elsize;
/* Execute the loop with no iterators */
npy_intp count = PyArray_DIM(op[1], axis);
@@ -3315,15 +3253,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
PyArray_NDIM(op[0]))) {
PyErr_SetString(PyExc_ValueError,
"provided out is the wrong size "
- "for the reduction");
+ "for the accumulation.");
goto fail;
}
stride0 = PyArray_STRIDE(op[0], axis);
- stride_copy[0] = stride0;
- stride_copy[1] = stride1;
- stride_copy[2] = stride0;
-
/* Turn the two items into three for the inner loop */
dataptr_copy[0] = PyArray_BYTES(op[0]);
dataptr_copy[1] = PyArray_BYTES(op[1]);
@@ -3335,7 +3269,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
* Output (dataptr[0]) and input (dataptr[1]) may point to the
* same memory, e.g. np.add.accumulate(a, out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of the
* reference count being zero temporarily.
@@ -3356,25 +3290,34 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count);
- needs_api = PyDataType_REFCHK(op_dtypes[0]);
+ needs_api = PyDataType_REFCHK(descrs[0]);
if (!needs_api) {
NPY_BEGIN_THREADS_THRESHOLDED(count);
}
- innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count, fixed_strides, auxdata);
NPY_END_THREADS;
}
}
finish:
- Py_XDECREF(op_dtypes[0]);
- int res = 0;
+ NPY_AUXDATA_FREE(auxdata);
+ Py_DECREF(descrs[0]);
+ Py_DECREF(descrs[1]);
+ Py_DECREF(descrs[2]);
+
if (!NpyIter_Deallocate(iter)) {
res = -1;
}
+
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ res = _check_ufunc_fperr(errormask, NULL, "accumulate");
+ }
+
if (res < 0) {
Py_DECREF(out);
return NULL;
@@ -3384,7 +3327,11 @@ finish:
fail:
Py_XDECREF(out);
- Py_XDECREF(op_dtypes[0]);
+
+ NPY_AUXDATA_FREE(auxdata);
+ Py_XDECREF(descrs[0]);
+ Py_XDECREF(descrs[1]);
+ Py_XDECREF(descrs[2]);
NpyIter_Deallocate(iter);
@@ -3409,28 +3356,31 @@ fail:
* indices[1::2] = range(1,len(array))
*
* output shape is based on the size of indices
+ *
+ * TODO: Reduceat duplicates too much code from accumulate!
*/
static PyObject *
PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
- PyArrayObject *out, int axis, int otype)
+ PyArrayObject *out, int axis, PyArray_DTypeMeta *signature[3])
{
PyArrayObject *op[3];
- PyArray_Descr *op_dtypes[3] = {NULL, NULL, NULL};
int op_axes_arrays[3][NPY_MAXDIMS];
int *op_axes[3] = {op_axes_arrays[0], op_axes_arrays[1],
op_axes_arrays[2]};
npy_uint32 op_flags[3];
- int idim, ndim, otype_final;
- int need_outer_iterator = 0;
+ int idim, ndim;
+ int needs_api, need_outer_iterator = 0;
+
+ int res = 0;
NpyIter *iter = NULL;
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata = NULL;
+
/* The reduceat indices - ind must be validated outside this call */
npy_intp *reduceat_ind;
npy_intp i, ind_size, red_axis_size;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
char *opname = "reduceat";
@@ -3470,42 +3420,32 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
/* Take a reference to out for later returning */
Py_XINCREF(out);
- otype_final = otype;
- if (get_binary_op_function(ufunc, &otype_final,
- &innerloop, &innerloopdata) < 0) {
- PyArray_Descr *dtype = PyArray_DescrFromType(otype);
- PyErr_Format(PyExc_ValueError,
- "could not find a matching type for %s.%s, "
- "requested type has type code '%c'",
- ufunc_name, opname, dtype ? dtype->type : '-');
- Py_XDECREF(dtype);
- goto fail;
+ PyArray_Descr *descrs[3];
+ PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+ arr, out, signature, NPY_TRUE, descrs, "reduceat");
+ if (ufuncimpl == NULL) {
+ return NULL;
}
- ndim = PyArray_NDIM(arr);
+ /* The below code assumes that all descriptors are identical: */
+ assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]);
- /*
- * Set up the output data type, using the input's exact
- * data type if the type number didn't change to preserve
- * metadata
- */
- if (PyArray_DESCR(arr)->type_num == otype_final) {
- if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) {
- op_dtypes[0] = PyArray_DESCR(arr);
- Py_INCREF(op_dtypes[0]);
- }
- else {
- op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr),
- NPY_NATIVE);
- }
- }
- else {
- op_dtypes[0] = PyArray_DescrFromType(otype_final);
- }
- if (op_dtypes[0] == NULL) {
+ if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) {
+ /* This can be removed, but the initial element copy needs fixing */
+ PyErr_SetString(PyExc_TypeError,
+ "reduceat currently only supports `object` dtype with "
+ "references");
goto fail;
}
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = descrs,
+ };
+
+ ndim = PyArray_NDIM(arr);
+
#if NPY_UF_DBG_TRACING
printf("Found %s.%s inner loop with dtype : ", ufunc_name, opname);
PyObject_Print((PyObject *)op_dtypes[0], stdout, 0);
@@ -3532,11 +3472,13 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
op[2] = ind;
if (out != NULL || ndim > 1 || !PyArray_ISALIGNED(arr) ||
- !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr))) {
+ !PyArray_EquivTypes(descrs[0], PyArray_DESCR(arr))) {
need_outer_iterator = 1;
}
if (need_outer_iterator) {
+ PyArray_Descr *op_dtypes[3] = {descrs[0], descrs[1], NULL};
+
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
NPY_ITER_REFS_OK|
NPY_ITER_MULTI_INDEX|
@@ -3565,8 +3507,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
NPY_UF_DBG_PRINT("Allocating outer iterator\n");
iter = NpyIter_AdvancedNew(3, op, flags,
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
- op_flags,
- op_dtypes,
+ op_flags, op_dtypes,
ndim, op_axes, NULL, 0);
if (iter == NULL) {
goto fail;
@@ -3590,11 +3531,15 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
Py_INCREF(out);
}
}
- /* Allocate the output for when there's no outer iterator */
- else if (out == NULL) {
- Py_INCREF(op_dtypes[0]);
+ else {
+ /*
+ * Allocate the output for when there's no outer iterator, we always
+ * use the outer_iteration path when `out` is passed.
+ */
+ assert(out == NULL);
+ Py_INCREF(descrs[0]);
op[0] = out = (PyArrayObject *)PyArray_NewFromDescr(
- &PyArray_Type, op_dtypes[0],
+ &PyArray_Type, descrs[0],
1, &ind_size, NULL, NULL,
0, NULL);
if (out == NULL) {
@@ -3602,6 +3547,28 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
}
}
+ npy_intp fixed_strides[3];
+ if (need_outer_iterator) {
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ }
+ else {
+ fixed_strides[1] = PyArray_STRIDES(op[1])[axis];
+ }
+ /* The reduce axis does not advance here in the strided-loop */
+ fixed_strides[0] = 0;
+ fixed_strides[2] = 0;
+
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (ufuncimpl->get_strided_loop(&context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
+
/*
* If the output has zero elements, return now.
*/
@@ -3619,8 +3586,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
npy_intp stride0, stride1;
npy_intp stride0_ind = PyArray_STRIDE(op[0], axis);
- int itemsize = op_dtypes[0]->elsize;
- int needs_api = NpyIter_IterationNeedsAPI(iter);
+ int itemsize = descrs[0]->elsize;
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
@@ -3640,10 +3607,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
stride_copy[1] = stride1;
stride_copy[2] = stride0;
- NPY_BEGIN_THREADS_NDITER(iter);
+ if (!needs_api) {
+ NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter));
+ }
do {
-
for (i = 0; i < ind_size; ++i) {
npy_intp start = reduceat_ind[i],
end = (i == ind_size-1) ? count_m1+1 :
@@ -3661,7 +3629,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
* to the same memory, e.g.
* np.add.reduceat(a, np.arange(len(a)), out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of
* the reference count being zero temporarily.
@@ -3681,33 +3649,24 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
dataptr_copy[1] += stride1;
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
(int)count);
- innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count, stride_copy, auxdata);
}
}
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+ } while (res == 0 && iternext(iter));
NPY_END_THREADS;
}
else if (iter == NULL) {
char *dataptr_copy[3];
- npy_intp stride_copy[3];
- int itemsize = op_dtypes[0]->elsize;
+ int itemsize = descrs[0]->elsize;
npy_intp stride0_ind = PyArray_STRIDE(op[0], axis);
-
- /* Execute the loop with no iterators */
- npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis);
-
- int needs_api = PyDataType_REFCHK(op_dtypes[0]);
+ npy_intp stride1 = PyArray_STRIDE(op[1], axis);
NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n");
- stride_copy[0] = stride0;
- stride_copy[1] = stride1;
- stride_copy[2] = stride0;
-
if (!needs_api) {
NPY_BEGIN_THREADS;
}
@@ -3729,7 +3688,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
* the same memory, e.g.
* np.add.reduceat(a, np.arange(len(a)), out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of the
* reference count being zero temporarily.
@@ -3749,8 +3708,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
dataptr_copy[1] += stride1;
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
(int)count);
- innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count, fixed_strides, auxdata);
+ if (res != 0) {
+ break;
+ }
}
}
@@ -3758,8 +3720,21 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
}
finish:
- Py_XDECREF(op_dtypes[0]);
+ NPY_AUXDATA_FREE(auxdata);
+ Py_DECREF(descrs[0]);
+ Py_DECREF(descrs[1]);
+ Py_DECREF(descrs[2]);
+
if (!NpyIter_Deallocate(iter)) {
+ res = -1;
+ }
+
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ res = _check_ufunc_fperr(errormask, NULL, "reduceat");
+ }
+
+ if (res < 0) {
Py_DECREF(out);
return NULL;
}
@@ -3768,9 +3743,14 @@ finish:
fail:
Py_XDECREF(out);
- Py_XDECREF(op_dtypes[0]);
+
+ NPY_AUXDATA_FREE(auxdata);
+ Py_XDECREF(descrs[0]);
+ Py_XDECREF(descrs[1]);
+ Py_XDECREF(descrs[2]);
NpyIter_Deallocate(iter);
+
return NULL;
}
@@ -3868,7 +3848,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL;
PyObject *op = NULL;
PyArrayObject *indices = NULL;
- PyArray_Descr *otype = NULL;
+ PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL};
PyArrayObject *out = NULL;
int keepdims = 0;
PyObject *initial = NULL;
@@ -4012,13 +3992,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
}
if (otype_obj && otype_obj != Py_None) {
/* Use `_get_dtype` because `dtype` is a DType and not the instance */
- PyArray_DTypeMeta *dtype = _get_dtype(otype_obj);
- if (dtype == NULL) {
+ signature[0] = _get_dtype(otype_obj);
+ if (signature[0] == NULL) {
goto fail;
}
- otype = dtype->singleton;
- Py_INCREF(otype);
- Py_DECREF(dtype);
}
if (out_obj && !PyArray_OutputConverter(out_obj, &out)) {
goto fail;
@@ -4038,15 +4015,6 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
ndim = PyArray_NDIM(mp);
- /* Check to see that type (and otype) is not FLEXIBLE */
- if (PyArray_ISFLEXIBLE(mp) ||
- (otype && PyTypeNum_ISFLEXIBLE(otype->type_num))) {
- PyErr_Format(PyExc_TypeError,
- "cannot perform %s with flexible type",
- _reduce_type[operation]);
- goto fail;
- }
-
/* Convert the 'axis' parameter into a list of axes */
if (axes_obj == NULL) {
/* apply defaults */
@@ -4109,14 +4077,12 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
}
/*
- * If out is specified it determines otype
- * unless otype already specified.
+ * If no dtype is specified and out is not specified, we override the
+ * integer and bool dtype used for add and multiply.
+ *
+ * TODO: The following should be handled by a promoter!
*/
- if (otype == NULL && out != NULL) {
- otype = PyArray_DESCR(out);
- Py_INCREF(otype);
- }
- if (otype == NULL) {
+ if (signature[0] == NULL && out == NULL) {
/*
* For integer types --- make sure at least a long
* is used for add and multiply reduction to avoid overflow
@@ -4136,16 +4102,17 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
typenum = NPY_LONG;
}
}
+ signature[0] = PyArray_DTypeFromTypeNum(typenum);
}
- otype = PyArray_DescrFromType(typenum);
}
-
+ Py_XINCREF(signature[0]);
+ signature[2] = signature[0];
switch(operation) {
case UFUNC_REDUCE:
- ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes,
- otype, keepdims, initial, wheremask);
- Py_XDECREF(wheremask);
+ ret = PyUFunc_Reduce(ufunc,
+ mp, out, naxes, axes, signature, keepdims, initial, wheremask);
+ Py_XSETREF(wheremask, NULL);
break;
case UFUNC_ACCUMULATE:
if (ndim == 0) {
@@ -4157,8 +4124,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
"accumulate does not allow multiple axes");
goto fail;
}
- ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0],
- otype->type_num);
+ ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc,
+ mp, out, axes[0], signature);
break;
case UFUNC_REDUCEAT:
if (ndim == 0) {
@@ -4171,19 +4138,22 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
goto fail;
}
ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc,
- mp, indices, out, axes[0], otype->type_num);
+ mp, indices, out, axes[0], signature);
Py_SETREF(indices, NULL);
break;
}
+ if (ret == NULL) {
+ goto fail;
+ }
+
+ Py_DECREF(signature[0]);
+ Py_DECREF(signature[1]);
+ Py_DECREF(signature[2]);
+
Py_DECREF(mp);
- Py_DECREF(otype);
Py_XDECREF(full_args.in);
Py_XDECREF(full_args.out);
- if (ret == NULL) {
- return NULL;
- }
-
/* Wrap and return the output */
{
/* Find __array_wrap__ - note that these rules are different to the
@@ -4211,7 +4181,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
}
fail:
- Py_XDECREF(otype);
+ Py_XDECREF(signature[0]);
+ Py_XDECREF(signature[1]);
+ Py_XDECREF(signature[2]);
+
Py_XDECREF(mp);
Py_XDECREF(wheremask);
Py_XDECREF(indices);
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 7e24bc493..aa8d7b982 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -247,6 +247,28 @@ PyUFunc_ValidateCasting(PyUFuncObject *ufunc,
}
+/*
+ * Same as `PyUFunc_ValidateCasting` but only checks output casting.
+ */
+NPY_NO_EXPORT int
+PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc,
+ NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes)
+{
+ int i, nin = ufunc->nin, nop = nin + ufunc->nout;
+
+ for (i = nin; i < nop; ++i) {
+ if (operands[i] == NULL) {
+ continue;
+ }
+ if (!PyArray_CanCastTypeTo(dtypes[i],
+ PyArray_DESCR(operands[i]), casting)) {
+ return raise_output_casting_error(
+ ufunc, casting, dtypes[i], PyArray_DESCR(operands[i]), i);
+ }
+ }
+ return 0;
+}
+
/*UFUNC_API
*
* This function applies the default type resolution rules
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index dd88a081a..84a2593f4 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -99,6 +99,10 @@ PyUFunc_DivmodTypeResolver(PyUFuncObject *ufunc,
PyObject *type_tup,
PyArray_Descr **out_dtypes);
+NPY_NO_EXPORT int
+PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc,
+ NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes);
+
/*
* Does a linear search for the best inner loop of the ufunc.
*
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index a9954dfc1..272555704 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -22,6 +22,7 @@
#include "numpy/npy_math.h"
#include "number.h"
+#include "dispatching.h"
static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om};
@@ -305,5 +306,33 @@ int initumath(PyObject *m)
return -1;
}
+ /*
+ * Set up promoters for logical functions
+ * TODO: This should probably be done at a better place, or even in the
+ * code generator directly.
+ */
+ s = _PyDict_GetItemStringWithError(d, "logical_and");
+ if (s == NULL) {
+ return -1;
+ }
+ if (install_logical_ufunc_promoter(s) < 0) {
+ return -1;
+ }
+
+ s = _PyDict_GetItemStringWithError(d, "logical_or");
+ if (s == NULL) {
+ return -1;
+ }
+ if (install_logical_ufunc_promoter(s) < 0) {
+ return -1;
+ }
+
+ s = _PyDict_GetItemStringWithError(d, "logical_xor");
+ if (s == NULL) {
+ return -1;
+ }
+ if (install_logical_ufunc_promoter(s) < 0) {
+ return -1;
+ }
return 0;
}
diff --git a/numpy/core/tests/test_custom_dtypes.py b/numpy/core/tests/test_custom_dtypes.py
index 5eb82bc93..e502a87ed 100644
--- a/numpy/core/tests/test_custom_dtypes.py
+++ b/numpy/core/tests/test_custom_dtypes.py
@@ -101,6 +101,22 @@ class TestSFloat:
expected_view = a.view(np.float64) * b.view(np.float64)
assert_array_equal(res.view(np.float64), expected_view)
+ def test_possible_and_impossible_reduce(self):
+ # For reductions to work, the first and last operand must have the
+ # same dtype. For this parametric DType that is not necessarily true.
+ a = self._get_array(2.)
+ # Addition reductin works (as of writing requires to pass initial
+ # because setting a scaled-float from the default `0` fails).
+ res = np.add.reduce(a, initial=0.)
+ assert res == a.astype(np.float64).sum()
+
+ # But each multiplication changes the factor, so a reduction is not
+ # possible (the relaxed version of the old refusal to handle any
+ # flexible dtype).
+ with pytest.raises(TypeError,
+ match="the resolved dtypes are not compatible"):
+ np.multiply.reduce(a)
+
def test_basic_multiply_promotion(self):
float_a = np.array([1., 2., 3.])
b = self._get_array(2.)
@@ -145,3 +161,23 @@ class TestSFloat:
# Check that casting the output fails also (done by the ufunc here)
with pytest.raises(TypeError):
np.add(a, a, out=c, casting="safe")
+
+ @pytest.mark.parametrize("ufunc",
+ [np.logical_and, np.logical_or, np.logical_xor])
+ def test_logical_ufuncs_casts_to_bool(self, ufunc):
+ a = self._get_array(2.)
+ a[0] = 0. # make sure first element is considered False.
+
+ float_equiv = a.astype(float)
+ expected = ufunc(float_equiv, float_equiv)
+ res = ufunc(a, a)
+ assert_array_equal(res, expected)
+
+ # also check that the same works for reductions:
+ expected = ufunc.reduce(float_equiv)
+ res = ufunc.reduce(a)
+ assert_array_equal(res, expected)
+
+ # The output casting does not match the bool, bool -> bool loop:
+ with pytest.raises(TypeError):
+ ufunc(a, a, out=np.empty(a.shape, dtype=int), casting="equiv")
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 69eba7ba0..b95d669a8 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -2029,6 +2029,21 @@ class TestDateTime:
assert_equal(np.maximum.reduce(a),
np.timedelta64(7, 's'))
+ def test_datetime_no_subtract_reducelike(self):
+ # subtracting two datetime64 works, but we cannot reduce it, since
+ # the result of that subtraction will have a different dtype.
+ arr = np.array(["2021-12-02", "2019-05-12"], dtype="M8[ms]")
+ msg = r"the resolved dtypes are not compatible with subtract\."
+
+ with pytest.raises(TypeError, match=msg + "reduce"):
+ np.subtract.reduce(arr)
+
+ with pytest.raises(TypeError, match=msg + "accumulate"):
+ np.subtract.accumulate(arr)
+
+ with pytest.raises(TypeError, match=msg + "reduceat"):
+ np.subtract.reduceat(arr, [0])
+
def test_datetime_busday_offset(self):
# First Monday in June
assert_equal(
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 30929ce91..78833a33c 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1362,6 +1362,14 @@ class TestUfunc:
np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object),
)
+ def test_object_array_accumulate_failure(self):
+ # Typical accumulation on object works as expected:
+ res = np.add.accumulate(np.array([1, 0, 2], dtype=object))
+ assert_array_equal(res, np.array([1, 1, 3], dtype=object))
+ # But errors are propagated from the inner-loop if they occur:
+ with pytest.raises(TypeError):
+ np.add.accumulate([1, None, 2])
+
def test_object_array_reduceat_inplace(self):
# Checks that in-place reduceats work, see also gh-7465
arr = np.empty(4, dtype=object)
@@ -1381,6 +1389,15 @@ class TestUfunc:
np.add.reduceat(arr, np.arange(4), out=arr, axis=-1)
assert_array_equal(arr, out)
+ def test_object_array_reduceat_failure(self):
+ # Reduceat works as expected when no invalid operation occurs (None is
+ # not involved in an operation here)
+ res = np.add.reduceat(np.array([1, None, 2], dtype=object), [1, 2])
+ assert_array_equal(res, np.array([None, 2], dtype=object))
+ # But errors when None would be involved in an operation:
+ with pytest.raises(TypeError):
+ np.add.reduceat([1, None, 2], [0, 2])
+
def test_zerosize_reduction(self):
# Test with default dtype and object dtype
for a in [[], np.array([], dtype=object)]:
@@ -2098,6 +2115,25 @@ class TestUfunc:
with pytest.raises(TypeError):
ufunc(a, a, signature=signature)
+ @pytest.mark.parametrize("ufunc",
+ [np.logical_and, np.logical_or, np.logical_xor])
+ def test_logical_ufuncs_support_anything(self, ufunc):
+ # The logical ufuncs support even input that can't be promoted:
+ a = np.array('1')
+ c = np.array([1., 2.])
+ assert_array_equal(ufunc(a, c), ufunc([True, True], True))
+ assert ufunc.reduce(a) == True
+
+ @pytest.mark.parametrize("ufunc",
+ [np.logical_and, np.logical_or, np.logical_xor])
+ def test_logical_ufuncs_out_cast_check(self, ufunc):
+ a = np.array('1')
+ c = np.array([1., 2.])
+ out = a.copy()
+ with pytest.raises(TypeError):
+ # It would be safe, but not equiv casting:
+ ufunc(a, c, out=out, casting="equiv")
+
def test_reduce_noncontig_output(self):
# Check that reduction deals with non-contiguous output arrays
# appropriately.
@@ -2119,6 +2155,22 @@ class TestUfunc:
assert_equal(y_base[1,:], y_base_copy[1,:])
assert_equal(y_base[3,:], y_base_copy[3,:])
+ @pytest.mark.parametrize("with_cast", [True, False])
+ def test_reduceat_and_accumulate_out_shape_mismatch(self, with_cast):
+ # Should raise an error mentioning "shape" or "size"
+ arr = np.arange(5)
+ out = np.arange(3) # definitely wrong shape
+ if with_cast:
+ # If a cast is necessary on the output, we can be sure to use
+ # the generic NpyIter (non-fast) path.
+ out = out.astype(np.float64)
+
+ with pytest.raises(ValueError, match="(shape|size)"):
+ np.add.reduceat(arr, [0, 3], out=out)
+
+ with pytest.raises(ValueError, match="(shape|size)"):
+ np.add.accumulate(arr, out=out)
+
@pytest.mark.parametrize('out_shape',
[(), (1,), (3,), (1, 1), (1, 3), (4, 3)])
@pytest.mark.parametrize('keepdims', [True, False])
@@ -2331,7 +2383,7 @@ def test_reduce_casterrors(offset):
out = np.array(-1, dtype=np.intp)
count = sys.getrefcount(value)
- with pytest.raises(ValueError):
+ with pytest.raises(TypeError):
# This is an unsafe cast, but we currently always allow that:
np.add.reduce(arr, dtype=np.intp, out=out)
assert count == sys.getrefcount(value)
@@ -2340,3 +2392,18 @@ def test_reduce_casterrors(offset):
# if the error happened immediately.
# This does not define behaviour, the output is invalid and thus undefined
assert out[()] < value * offset
+
+
+@pytest.mark.parametrize("method",
+ [np.add.accumulate, np.add.reduce,
+ pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat")])
+def test_reducelike_floaterrors(method):
+ # adding inf and -inf creates an invalid float and should give a warning
+ arr = np.array([np.inf, 0, -np.inf])
+ with np.errstate(all="warn"):
+ with pytest.warns(RuntimeWarning, match="invalid value"):
+ method(arr)
+
+ with np.errstate(all="raise"):
+ with pytest.raises(FloatingPointError):
+ method(arr)
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 2ff1667ba..036d6312c 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -1065,7 +1065,7 @@ class _MaskedBinaryOperation(_MaskedUFunc):
tr = self.f.reduce(t, axis)
mr = nomask
else:
- tr = self.f.reduce(t, axis, dtype=dtype or t.dtype)
+ tr = self.f.reduce(t, axis, dtype=dtype)
mr = umath.logical_and.reduce(m, axis)
if not tr.shape: