diff options
author | Matti Picus <matti.picus@gmail.com> | 2021-10-19 17:48:35 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-19 17:48:35 +0300 |
commit | 405c6eec7937721bd2e284dceaf4f49d5e6e39e8 (patch) | |
tree | 219cfaf9b01b5a99c7aaf13b427618225c2e059f | |
parent | dd2eaaabdb0451631e95376ae2b4d319082b438e (diff) | |
parent | 27f3b03dcc4fa2570825f9233f2e593c3c320673 (diff) | |
download | numpy-405c6eec7937721bd2e284dceaf4f49d5e6e39e8.tar.gz |
Merge pull request #18905 from seberg/ufunc-refactor-2021
MAINT: Refactor reductions to use NEP 43 style dispatching/promotion
-rw-r--r-- | numpy/core/src/multiarray/array_method.c | 7 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_method.h | 11 | ||||
-rw-r--r-- | numpy/core/src/umath/_scaled_float_dtype.c | 52 | ||||
-rw-r--r-- | numpy/core/src/umath/dispatching.c | 177 | ||||
-rw-r--r-- | numpy/core/src/umath/dispatching.h | 4 | ||||
-rw-r--r-- | numpy/core/src/umath/legacy_array_method.c | 19 | ||||
-rw-r--r-- | numpy/core/src/umath/reduction.c | 57 | ||||
-rw-r--r-- | numpy/core/src/umath/reduction.h | 110 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 773 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.c | 22 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.h | 4 | ||||
-rw-r--r-- | numpy/core/src/umath/umathmodule.c | 29 | ||||
-rw-r--r-- | numpy/core/tests/test_custom_dtypes.py | 36 | ||||
-rw-r--r-- | numpy/core/tests/test_datetime.py | 15 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 69 | ||||
-rw-r--r-- | numpy/ma/core.py | 2 |
16 files changed, 866 insertions, 521 deletions
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c index 406b0c6ff..d93dac506 100644 --- a/numpy/core/src/multiarray/array_method.c +++ b/numpy/core/src/multiarray/array_method.c @@ -780,6 +780,13 @@ _masked_stridedloop_data_free(NpyAuxData *auxdata) * This function wraps a regular unmasked strided-loop as a * masked strided-loop, only calling the function for elements * where the mask is True. + * + * TODO: Reductions also use this code to implement masked reductions. + * Before consolidating them, reductions had a special case for + * broadcasts: when the mask stride was 0 the code does not check all + * elements as `npy_memchr` currently does. + * It may be worthwhile to add such an optimization again if broadcasted + * masks are common enough. */ static int generic_masked_strided_loop(PyArrayMethod_Context *context, diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h index b29c7c077..7b7372bd0 100644 --- a/numpy/core/src/multiarray/array_method.h +++ b/numpy/core/src/multiarray/array_method.h @@ -21,6 +21,17 @@ typedef enum { NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2, /* Whether the method supports unaligned access (not runtime) */ NPY_METH_SUPPORTS_UNALIGNED = 1 << 3, + /* + * Private flag for now for *logic* functions. The logical functions + * `logical_or` and `logical_and` can always cast the inputs to booleans + * "safely" (because that is how the cast to bool is defined). + * @seberg: I am not sure this is the best way to handle this, so its + * private for now (also it is very limited anyway). + * There is one "exception". NA aware dtypes cannot cast to bool + * (hopefully), so the `??->?` loop should error even with this flag. + * But a second NA fallback loop will be necessary. + */ + _NPY_METH_FORCE_CAST_INPUTS = 1 << 17, /* All flags which can change at runtime */ NPY_METH_RUNTIME_FLAGS = ( diff --git a/numpy/core/src/umath/_scaled_float_dtype.c b/numpy/core/src/umath/_scaled_float_dtype.c index eeef33a3d..b6c19362a 100644 --- a/numpy/core/src/umath/_scaled_float_dtype.c +++ b/numpy/core/src/umath/_scaled_float_dtype.c @@ -398,6 +398,42 @@ float_to_from_sfloat_resolve_descriptors( } +/* + * Cast to boolean (for testing the logical functions a bit better). + */ +static int +cast_sfloat_to_bool(PyArrayMethod_Context *NPY_UNUSED(context), + char *const data[], npy_intp const dimensions[], + npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in = data[0]; + char *out = data[1]; + for (npy_intp i = 0; i < N; i++) { + *(npy_bool *)out = *(double *)in != 0; + in += strides[0]; + out += strides[1]; + } + return 0; +} + +static NPY_CASTING +sfloat_to_bool_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (loop_descrs[0] == NULL) { + return -1; + } + loop_descrs[1] = PyArray_DescrFromType(NPY_BOOL); /* cannot fail */ + return NPY_UNSAFE_CASTING; +} + + static int init_casts(void) { @@ -453,6 +489,22 @@ init_casts(void) return -1; } + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &sfloat_to_bool_resolve_descriptors; + slots[1].slot = NPY_METH_strided_loop; + slots[1].pfunc = &cast_sfloat_to_bool; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + spec.name = "sfloat_to_bool_cast"; + dtypes[0] = &PyArray_SFloatDType; + dtypes[1] = PyArray_DTypeFromTypeNum(NPY_BOOL); + Py_DECREF(dtypes[1]); /* immortal anyway */ + + if (PyArray_AddCastingImplementation_FromSpec(&spec, 0)) { + return -1; + } + return 0; } diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c index 40de28754..9c76b40e0 100644 --- a/numpy/core/src/umath/dispatching.c +++ b/numpy/core/src/umath/dispatching.c @@ -267,8 +267,39 @@ resolve_implementation_info(PyUFuncObject *ufunc, * the subclass should be considered a better match * (subclasses are always more specific). */ + /* Whether this (normally output) dtype was specified at all */ + if (op_dtypes[i] == NULL) { + /* + * When DType is completely unspecified, prefer abstract + * over concrete, assuming it will resolve. + * Furthermore, we cannot decide which abstract/None + * is "better", only concrete ones which are subclasses + * of Abstract ones are defined as worse. + */ + npy_bool prev_is_concrete = NPY_FALSE; + npy_bool new_is_concrete = NPY_FALSE; + if ((prev_dtype != Py_None) && + !NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) { + prev_is_concrete = NPY_TRUE; + } + if ((new_dtype != Py_None) && + !NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) { + new_is_concrete = NPY_TRUE; + } + if (prev_is_concrete == new_is_concrete) { + best = -1; + } + else if (prev_is_concrete) { + unambiguously_equally_good = 0; + best = 1; + } + else { + unambiguously_equally_good = 0; + best = 0; + } + } /* If either is None, the other is strictly more specific */ - if (prev_dtype == Py_None) { + else if (prev_dtype == Py_None) { unambiguously_equally_good = 0; best = 1; } @@ -289,13 +320,29 @@ resolve_implementation_info(PyUFuncObject *ufunc, */ best = -1; } + else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) { + /* old is not abstract, so better (both not possible) */ + unambiguously_equally_good = 0; + best = 0; + } + else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) { + /* new is not abstract, so better (both not possible) */ + unambiguously_equally_good = 0; + best = 1; + } /* - * TODO: Unreachable, but we will need logic for abstract - * DTypes to decide if one is a subclass of the other - * (And their subclass relation is well defined.) + * TODO: This will need logic for abstract DTypes to decide if + * one is a subclass of the other (And their subclass + * relation is well defined). For now, we bail out + * in cas someone manages to get here. */ else { - assert(0); + PyErr_SetString(PyExc_NotImplementedError, + "deciding which one of two abstract dtypes is " + "a better match is not yet implemented. This " + "will pick the better (or bail) in the future."); + *out_info = NULL; + return -1; } if ((current_best != -1) && (current_best != best)) { @@ -612,6 +659,35 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, } return info; } + else if (info == NULL && op_dtypes[0] == NULL) { + /* + * If we have a reduction, fill in the unspecified input/array + * assuming it should have the same dtype as the operand input + * (or the output one if given). + * Then, try again. In some cases, this will choose different + * paths, such as `ll->?` instead of an `??->?` loop for `np.equal` + * when the input is `.l->.` (`.` meaning undefined). This will + * then cause an error. But cast to `?` would always lose + * information, and in many cases important information: + * + * ```python + * from operator import eq + * from functools import reduce + * + * reduce(eq, [1, 2, 3]) != reduce(eq, [True, True, True]) + * ``` + * + * The special cases being `logical_(and|or|xor)` which can always + * cast to boolean ahead of time and still give the right answer + * (unsafe cast to bool is fine here). We special case these at + * the time of this comment (NumPy 1.21). + */ + assert(ufunc->nin == 2 && ufunc->nout == 1); + op_dtypes[0] = op_dtypes[2] != NULL ? op_dtypes[2] : op_dtypes[1]; + Py_INCREF(op_dtypes[0]); + return promote_and_get_info_and_ufuncimpl(ufunc, + ops, signature, op_dtypes, allow_legacy_promotion, 1); + } } /* @@ -743,3 +819,94 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc, return method; } + + +/* + * Special promoter for the logical ufuncs. The logical ufuncs can always + * use the ??->? and still get the correct output (as long as the output + * is not supposed to be `object`). + */ +static int +logical_ufunc_promoter(PyUFuncObject *NPY_UNUSED(ufunc), + PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *new_op_dtypes[]) +{ + /* + * If we find any object DType at all, we currently force to object. + * However, if the output is specified and not object, there is no point, + * it should be just as well to cast the input rather than doing the + * unsafe out cast. + */ + int force_object = 0; + + for (int i = 0; i < 3; i++) { + PyArray_DTypeMeta *item; + if (signature[i] != NULL) { + item = signature[i]; + Py_INCREF(item); + if (item->type_num == NPY_OBJECT) { + force_object = 1; + } + } + else { + /* Always override to boolean */ + item = PyArray_DTypeFromTypeNum(NPY_BOOL); + if (op_dtypes[i] != NULL && op_dtypes[i]->type_num == NPY_OBJECT) { + force_object = 1; + } + } + new_op_dtypes[i] = item; + } + + if (!force_object || (op_dtypes[2] != NULL + && op_dtypes[2]->type_num != NPY_OBJECT)) { + return 0; + } + /* + * Actually, we have to use the OBJECT loop after all, set all we can + * to object (that might not work out, but try). + * + * NOTE: Change this to check for `op_dtypes[0] == NULL` to STOP + * returning `object` for `np.logical_and.reduce(obj_arr)` + * which will also affect `np.all` and `np.any`! + */ + for (int i = 0; i < 3; i++) { + if (signature[i] != NULL) { + continue; + } + Py_SETREF(new_op_dtypes[i], PyArray_DTypeFromTypeNum(NPY_OBJECT)); + } + return 0; +} + + +NPY_NO_EXPORT int +install_logical_ufunc_promoter(PyObject *ufunc) +{ + if (PyObject_Type(ufunc) != (PyObject *)&PyUFunc_Type) { + PyErr_SetString(PyExc_RuntimeError, + "internal numpy array, logical ufunc was not a ufunc?!"); + return -1; + } + PyObject *dtype_tuple = PyTuple_Pack(3, + &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type, NULL); + if (dtype_tuple == NULL) { + return -1; + } + PyObject *promoter = PyCapsule_New(&logical_ufunc_promoter, + "numpy._ufunc_promoter", NULL); + if (promoter == NULL) { + Py_DECREF(dtype_tuple); + return -1; + } + + PyObject *info = PyTuple_Pack(2, dtype_tuple, promoter); + Py_DECREF(dtype_tuple); + Py_DECREF(promoter); + if (info == NULL) { + return -1; + } + + return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0); +} + diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h index 8d116873c..2f314615d 100644 --- a/numpy/core/src/umath/dispatching.h +++ b/numpy/core/src/umath/dispatching.h @@ -26,4 +26,8 @@ NPY_NO_EXPORT PyObject * add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc, PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate); +NPY_NO_EXPORT int +install_logical_ufunc_promoter(PyObject *ufunc); + + #endif /*_NPY_DISPATCHING_H */ diff --git a/numpy/core/src/umath/legacy_array_method.c b/numpy/core/src/umath/legacy_array_method.c index 77b1b9013..a423823d4 100644 --- a/numpy/core/src/umath/legacy_array_method.c +++ b/numpy/core/src/umath/legacy_array_method.c @@ -217,6 +217,25 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, */ int any_output_flexible = 0; NPY_ARRAYMETHOD_FLAGS flags = 0; + if (ufunc->nargs == 3 && + signature[0]->type_num == NPY_BOOL && + signature[1]->type_num == NPY_BOOL && + signature[2]->type_num == NPY_BOOL && ( + strcmp(ufunc->name, "logical_or") == 0 || + strcmp(ufunc->name, "logical_and") == 0 || + strcmp(ufunc->name, "logical_xor") == 0)) { + /* + * This is a logical ufunc, and the `??->?` loop`. It is always OK + * to cast any input to bool, because that cast is defined by + * truthiness. + * This allows to ensure two things: + * 1. `np.all`/`np.any` know that force casting the input is OK + * (they must do this since there are no `?l->?`, etc. loops) + * 2. The logical functions automatically work for any DType + * implementing a cast to boolean. + */ + flags = _NPY_METH_FORCE_CAST_INPUTS; + } for (int i = 0; i < ufunc->nin+ufunc->nout; i++) { if (signature[i]->singleton->flags & ( diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index d5a251368..c28c8abd8 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -145,14 +145,12 @@ PyArray_CopyInitialReduceValues( * boilerplate code, just calling the appropriate inner loop function where * necessary. * + * context : The ArrayMethod context (with ufunc, method, and descriptors). * operand : The array to be reduced. * out : NULL, or the array into which to place the result. * wheremask : NOT YET SUPPORTED, but this parameter is placed here * so that support can be added in the future without breaking * API compatibility. Pass in NULL. - * operand_dtype : The dtype the inner loop expects for the operand. - * result_dtype : The dtype the inner loop expects for the result. - * casting : The casting rule to apply to the operands. * axis_flags : Flags indicating the reduction axes of 'operand'. * reorderable : If True, the reduction being done is reorderable, which * means specifying multiple axes of reduction at once is ok, @@ -182,10 +180,8 @@ PyArray_CopyInitialReduceValues( * generalized ufuncs!) */ NPY_NO_EXPORT PyArrayObject * -PyUFunc_ReduceWrapper( +PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask, - PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype, - NPY_CASTING casting, npy_bool *axis_flags, int reorderable, int keepdims, PyObject *identity, PyArray_ReduceLoopFunc *loop, void *data, npy_intp buffersize, const char *funcname, int errormask) @@ -199,6 +195,8 @@ PyUFunc_ReduceWrapper( PyArrayObject *op[3]; PyArray_Descr *op_dtypes[3]; npy_uint32 it_flags, op_flags[3]; + /* Loop auxdata (must be freed on error) */ + NpyAuxData *auxdata = NULL; /* More than one axis means multiple orders are possible */ if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { @@ -221,8 +219,8 @@ PyUFunc_ReduceWrapper( /* Set up the iterator */ op[0] = out; op[1] = operand; - op_dtypes[0] = result_dtype; - op_dtypes[1] = operand_dtype; + op_dtypes[0] = context->descriptors[0]; + op_dtypes[1] = context->descriptors[1]; it_flags = NPY_ITER_BUFFERED | NPY_ITER_EXTERNAL_LOOP | @@ -291,7 +289,7 @@ PyUFunc_ReduceWrapper( } iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, it_flags, - NPY_KEEPORDER, casting, + NPY_KEEPORDER, NPY_UNSAFE_CASTING, op_flags, op_dtypes, PyArray_NDIM(operand), op_axes, NULL, buffersize); @@ -301,9 +299,29 @@ PyUFunc_ReduceWrapper( result = NpyIter_GetOperandArray(iter)[0]; - int needs_api = NpyIter_IterationNeedsAPI(iter); - /* Start with the floating-point exception flags cleared */ - npy_clear_floatstatus_barrier((char*)&iter); + PyArrayMethod_StridedLoop *strided_loop; + NPY_ARRAYMETHOD_FLAGS flags = 0; + npy_intp fixed_strides[3]; + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + if (wheremask != NULL) { + if (PyArrayMethod_GetMaskedStridedLoop(context, + 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + } + else { + if (context->method->get_strided_loop(context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + } + + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + needs_api |= NpyIter_IterationNeedsAPI(iter); + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } /* * Initialize the result to the reduction unit if possible, @@ -345,16 +363,18 @@ PyUFunc_ReduceWrapper( strideptr = NpyIter_GetInnerStrideArray(iter); countptr = NpyIter_GetInnerLoopSizePtr(iter); - if (loop(iter, dataptr, strideptr, countptr, - iternext, needs_api, skip_first_count, data) < 0) { + if (loop(context, strided_loop, auxdata, + iter, dataptr, strideptr, countptr, iternext, + needs_api, skip_first_count) < 0) { goto fail; } } - /* Check whether any errors occurred during the loop */ - if (PyErr_Occurred() || - _check_ufunc_fperr(errormask, NULL, "reduce") < 0) { - goto fail; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even on error */ + if (_check_ufunc_fperr(errormask, NULL, "reduce") < 0) { + goto fail; + } } if (out != NULL) { @@ -369,6 +389,7 @@ PyUFunc_ReduceWrapper( return result; fail: + NPY_AUXDATA_FREE(auxdata); if (iter != NULL) { NpyIter_Deallocate(iter); } diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h index 372605dba..2170e27a7 100644 --- a/numpy/core/src/umath/reduction.h +++ b/numpy/core/src/umath/reduction.h @@ -19,93 +19,17 @@ typedef int (PyArray_AssignReduceIdentityFunc)(PyArrayObject *result, void *data); /* - * This is a function for the reduce loop. + * Inner definition of the reduce loop, only used for a static function. + * At some point around NumPy 1.6, there was probably an intention to make + * the reduce loop customizable at this level (per ufunc?). * - * The needs_api parameter indicates whether it's ok to release the GIL during - * the loop, such as when the iternext() function never calls - * a function which could raise a Python exception. - * - * The skip_first_count parameter indicates how many elements need to be - * skipped based on NpyIter_IsFirstVisit checks. This can only be positive - * when the 'assign_identity' parameter was NULL when calling - * PyArray_ReduceWrapper. - * - * The loop gets two data pointers and two strides, and should - * look roughly like this: - * { - * NPY_BEGIN_THREADS_DEF; - * if (!needs_api) { - * NPY_BEGIN_THREADS; - * } - * // This first-visit loop can be skipped if 'assign_identity' was non-NULL - * if (skip_first_count > 0) { - * do { - * char *data0 = dataptr[0], *data1 = dataptr[1]; - * npy_intp stride0 = strideptr[0], stride1 = strideptr[1]; - * npy_intp count = *countptr; - * - * // Skip any first-visit elements - * if (NpyIter_IsFirstVisit(iter, 0)) { - * if (stride0 == 0) { - * --count; - * --skip_first_count; - * data1 += stride1; - * } - * else { - * skip_first_count -= count; - * count = 0; - * } - * } - * - * while (count--) { - * *(result_t *)data0 = my_reduce_op(*(result_t *)data0, - * *(operand_t *)data1); - * data0 += stride0; - * data1 += stride1; - * } - * - * // Jump to the faster loop when skipping is done - * if (skip_first_count == 0) { - * if (iternext(iter)) { - * break; - * } - * else { - * goto finish_loop; - * } - * } - * } while (iternext(iter)); - * } - * do { - * char *data0 = dataptr[0], *data1 = dataptr[1]; - * npy_intp stride0 = strideptr[0], stride1 = strideptr[1]; - * npy_intp count = *countptr; - * - * while (count--) { - * *(result_t *)data0 = my_reduce_op(*(result_t *)data0, - * *(operand_t *)data1); - * data0 += stride0; - * data1 += stride1; - * } - * } while (iternext(iter)); - * finish_loop: - * if (!needs_api) { - * NPY_END_THREADS; - * } - * return (needs_api && PyErr_Occurred()) ? -1 : 0; - * } - * - * If needs_api is True, this function should call PyErr_Occurred() - * to check if an error occurred during processing, and return -1 for - * error, 0 for success. + * TODO: This should be refactored/removed. */ -typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, - char **dataptr, - npy_intp const *strideptr, - npy_intp const *countptr, - NpyIter_IterNextFunc *iternext, - int needs_api, - npy_intp skip_first_count, - void *data); +typedef int (PyArray_ReduceLoopFunc)(PyArrayMethod_Context *context, + PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata, + NpyIter *iter, char **dataptrs, npy_intp const *strides, + npy_intp const *countptr, NpyIter_IterNextFunc *iternext, + int needs_api, npy_intp skip_first_count); /* * This function executes all the standard NumPy reduction function @@ -138,16 +62,10 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, * errormask : forwarded from _get_bufsize_errmask */ NPY_NO_EXPORT PyArrayObject * -PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, - PyArrayObject *wheremask, - PyArray_Descr *operand_dtype, - PyArray_Descr *result_dtype, - NPY_CASTING casting, - npy_bool *axis_flags, int reorderable, - int keepdims, - PyObject *identity, - PyArray_ReduceLoopFunc *loop, - void *data, npy_intp buffersize, const char *funcname, - int errormask); +PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, + PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask, + npy_bool *axis_flags, int reorderable, int keepdims, + PyObject *identity, PyArray_ReduceLoopFunc *loop, + void *data, npy_intp buffersize, const char *funcname, int errormask); #endif diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 42290e8c9..228e67576 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -990,6 +990,7 @@ convert_ufunc_arguments(PyUFuncObject *ufunc, } /* Convert and fill in output arguments */ + memset(out_op_DTypes + nin, 0, nout * sizeof(*out_op_DTypes)); if (full_args.out != NULL) { for (int i = 0; i < nout; i++) { obj = PyTuple_GET_ITEM(full_args.out, i); @@ -1047,6 +1048,7 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, PyArrayObject **op, PyArray_Descr **dtypes, NPY_CASTING casting, npy_intp buffersize) { + int force_cast_input = ufuncimpl->flags & _NPY_METH_FORCE_CAST_INPUTS; int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout; for (i = 0; i < nop; ++i) { @@ -1070,7 +1072,13 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, must_copy = 1; } - if (PyArray_MinCastSafety(safety, casting) != casting) { + if (force_cast_input && i < nin) { + /* + * ArrayMethod flagged to ignore casting (logical funcs + * can force cast to bool) + */ + } + else if (PyArray_MinCastSafety(safety, casting) != casting) { return 0; /* the cast is not safe enough */ } } @@ -1360,8 +1368,15 @@ validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc, */ return 0; } - if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) { - return -1; + if (method->flags & _NPY_METH_FORCE_CAST_INPUTS) { + if (PyUFunc_ValidateOutCasting(ufunc, casting, ops, descriptors) < 0) { + return -1; + } + } + else { + if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) { + return -1; + } } return 0; } @@ -2470,9 +2485,9 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, /* Final preparation of the arraymethod call */ PyArrayMethod_Context context = { - .caller = (PyObject *)ufunc, - .method = ufuncimpl, - .descriptors = operation_descrs, + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, }; PyArrayMethod_StridedLoop *strided_loop; NPY_ARRAYMETHOD_FLAGS flags = 0; @@ -2592,9 +2607,9 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, /* Final preparation of the arraymethod call */ PyArrayMethod_Context context = { - .caller = (PyObject *)ufunc, - .method = ufuncimpl, - .descriptors = operation_descrs, + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, }; /* Do the ufunc loop */ @@ -2661,195 +2676,129 @@ PyUFunc_GenericFunction(PyUFuncObject *NPY_UNUSED(ufunc), /* - * Given the output type, finds the specified binary op. The - * ufunc must have nin==2 and nout==1. The function may modify - * otype if the given type isn't found. + * Promote and resolve a reduction like operation. * - * Returns 0 on success, -1 on failure. + * @param ufunc + * @param arr The operation array + * @param out The output array or NULL if not provided. Note that NumPy always + * used out to mean the same as `dtype=out.dtype` and never passed + * the array itself to the type-resolution. + * @param signature The DType signature, which may already be set due to the + * dtype passed in by the user, or the special cases (add, multiply). + * (Contains strong references and may be modified.) + * @param enforce_uniform_args If `NPY_TRUE` fully uniform dtypes/descriptors + * are enforced as required for accumulate and (currently) reduceat. + * @param out_descrs New references to the resolved descriptors (on success). + * @param method The ufunc method, "reduce", "reduceat", or "accumulate". + + * @returns ufuncimpl The `ArrayMethod` implemention to use. Or NULL if an + * error occurred. */ -static int -get_binary_op_function(PyUFuncObject *ufunc, int *otype, - PyUFuncGenericFunction *out_innerloop, - void **out_innerloopdata) +static PyArrayMethodObject * +reducelike_promote_and_resolve(PyUFuncObject *ufunc, + PyArrayObject *arr, PyArrayObject *out, + PyArray_DTypeMeta *signature[3], + npy_bool enforce_uniform_args, PyArray_Descr *out_descrs[3], + char *method) { - int i; - - NPY_UF_DBG_PRINT1("Getting binary op function for type number %d\n", - *otype); - - /* If the type is custom and there are userloops, search for it here */ - if (ufunc->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) { - PyObject *key, *obj; - key = PyLong_FromLong(*otype); - if (key == NULL) { - return -1; - } - obj = PyDict_GetItemWithError(ufunc->userloops, key); - Py_DECREF(key); - if (obj == NULL && PyErr_Occurred()) { - return -1; - } - else if (obj != NULL) { - PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL); - if (funcdata == NULL) { - return -1; - } - while (funcdata != NULL) { - int *types = funcdata->arg_types; - - if (types[0] == *otype && types[1] == *otype && - types[2] == *otype) { - *out_innerloop = funcdata->func; - *out_innerloopdata = funcdata->data; - return 0; - } + /* + * Note that the `ops` is not realy correct. But legacy resolution + * cannot quite handle the correct ops (e.g. a NULL first item if `out` + * is NULL), and it should only matter in very strange cases. + */ + PyArrayObject *ops[3] = {arr, arr, NULL}; + /* + * TODO: If `out` is not provided, arguably `initial` could define + * the first DType (and maybe also the out one), that way + * `np.add.reduce([1, 2, 3], initial=3.4)` would return a float + * value. As of 1.20, it returned an integer, so that should + * probably go to an error/warning first. + */ + PyArray_DTypeMeta *operation_DTypes[3] = { + NULL, NPY_DTYPE(PyArray_DESCR(arr)), NULL}; + Py_INCREF(operation_DTypes[1]); - funcdata = funcdata->next; - } - } + if (out != NULL) { + operation_DTypes[0] = NPY_DTYPE(PyArray_DESCR(out)); + Py_INCREF(operation_DTypes[0]); + operation_DTypes[2] = operation_DTypes[0]; + Py_INCREF(operation_DTypes[2]); } - /* Search for a function with compatible inputs */ - for (i = 0; i < ufunc->ntypes; ++i) { - char *types = ufunc->types + i*ufunc->nargs; - - NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n", - types[0], types[1], types[2]); - - if (PyArray_CanCastSafely(*otype, types[0]) && - types[0] == types[1] && - (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) { - /* If the signature is "xx->x", we found the loop */ - if (types[2] == types[0]) { - *out_innerloop = ufunc->functions[i]; - *out_innerloopdata = ufunc->data[i]; - *otype = types[0]; - return 0; - } - /* - * Otherwise, we found the natural type of the reduction, - * replace otype and search again - */ - else { - *otype = types[2]; - break; - } - } + PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, + ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE); + Py_DECREF(operation_DTypes[1]); + if (out != NULL) { + Py_DECREF(operation_DTypes[0]); + Py_DECREF(operation_DTypes[2]); } - - /* Search for the exact function */ - for (i = 0; i < ufunc->ntypes; ++i) { - char *types = ufunc->types + i*ufunc->nargs; - - if (PyArray_CanCastSafely(*otype, types[0]) && - types[0] == types[1] && - types[1] == types[2] && - (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) { - /* Since the signature is "xx->x", we found the loop */ - *out_innerloop = ufunc->functions[i]; - *out_innerloopdata = ufunc->data[i]; - *otype = types[0]; - return 0; - } + if (ufuncimpl == NULL) { + return NULL; } - return -1; -} - -static int -reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, - PyArray_Descr *odtype, PyArray_Descr **out_dtype) -{ - int i, retcode; - PyArrayObject *op[3] = {arr, arr, NULL}; - PyArray_Descr *dtypes[3] = {NULL, NULL, NULL}; - const char *ufunc_name = ufunc_get_name_cstr(ufunc); - PyObject *type_tup = NULL; - - *out_dtype = NULL; - /* - * If odtype is specified, make a type tuple for the type - * resolution. + * Find the correct descriptors for the operation. We use unsafe casting + * for historic reasons: The logic ufuncs required it to cast everything to + * boolean. However, we now special case the logical ufuncs, so that the + * casting safety could in principle be set to the default same-kind. + * (although this should possibly happen through a deprecation) */ - if (odtype != NULL) { - type_tup = PyTuple_Pack(3, odtype, odtype, Py_None); - if (type_tup == NULL) { - return -1; - } - } - - /* Use the type resolution function to find our loop */ - retcode = ufunc->type_resolver( - ufunc, NPY_UNSAFE_CASTING, - op, type_tup, dtypes); - Py_DECREF(type_tup); - if (retcode == -1) { - return -1; - } - else if (retcode == -2) { - PyErr_Format(PyExc_RuntimeError, - "type resolution returned NotImplemented to " - "reduce ufunc %s", ufunc_name); - return -1; + if (resolve_descriptors(3, ufunc, ufuncimpl, + ops, out_descrs, signature, NPY_UNSAFE_CASTING) < 0) { + return NULL; } /* - * The first two type should be equivalent. Because of how - * reduce has historically behaved in NumPy, the return type - * could be different, and it is the return type on which the - * reduction occurs. + * The first operand and output should be the same array, so they should + * be identical. The second argument can be different for reductions, + * but is checked to be identical for accumulate and reduceat. */ - if (!PyArray_EquivTypes(dtypes[0], dtypes[1])) { - for (i = 0; i < 3; ++i) { - Py_DECREF(dtypes[i]); - } - PyErr_Format(PyExc_RuntimeError, - "could not find a type resolution appropriate for " - "reduce ufunc %s", ufunc_name); - return -1; + if (out_descrs[0] != out_descrs[2] || ( + enforce_uniform_args && out_descrs[0] != out_descrs[1])) { + PyErr_Format(PyExc_TypeError, + "the resolved dtypes are not compatible with %s.%s", + ufunc_get_name_cstr(ufunc), method); + goto fail; + } + /* TODO: This really should _not_ be unsafe casting (same above)! */ + if (validate_casting(ufuncimpl, + ufunc, ops, out_descrs, NPY_UNSAFE_CASTING) < 0) { + goto fail; } - Py_DECREF(dtypes[0]); - Py_DECREF(dtypes[1]); - *out_dtype = dtypes[2]; + return ufuncimpl; - return 0; + fail: + for (int i = 0; i < 3; ++i) { + Py_DECREF(out_descrs[i]); + } + return NULL; } + static int -reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides, - npy_intp const *countptr, NpyIter_IterNextFunc *iternext, - int needs_api, npy_intp skip_first_count, void *data) +reduce_loop(PyArrayMethod_Context *context, + PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata, + NpyIter *iter, char **dataptrs, npy_intp const *strides, + npy_intp const *countptr, NpyIter_IterNextFunc *iternext, + int needs_api, npy_intp skip_first_count) { - PyArray_Descr *dtypes[3], **iter_dtypes; - PyUFuncObject *ufunc = (PyUFuncObject *)data; - char *dataptrs_copy[3]; - npy_intp strides_copy[3]; + int retval; + char *dataptrs_copy[4]; + npy_intp strides_copy[4]; npy_bool masked; - /* The normal selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; - NPY_BEGIN_THREADS_DEF; /* Get the number of operands, to determine whether "where" is used */ masked = (NpyIter_GetNOp(iter) == 3); - /* Get the inner loop */ - iter_dtypes = NpyIter_GetDescrArray(iter); - dtypes[0] = iter_dtypes[0]; - dtypes[1] = iter_dtypes[1]; - dtypes[2] = iter_dtypes[0]; - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { - return -1; + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); } - NPY_BEGIN_THREADS_NDITER(iter); - if (skip_first_count > 0) { - do { + assert(!masked); /* Path currently not available for masked */ + while (1) { npy_intp count = *countptr; /* Skip any first-visit elements */ @@ -2872,27 +2821,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides, strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; - innerloop(dataptrs_copy, &count, - strides_copy, innerloopdata); - if (needs_api && PyErr_Occurred()) { + retval = strided_loop(context, + dataptrs_copy, &count, strides_copy, auxdata); + if (retval < 0) { goto finish_loop; } - /* Jump to the faster loop when skipping is done */ - if (skip_first_count == 0) { - if (iternext(iter)) { - break; - } - else { - goto finish_loop; - } + /* Advance loop, and abort on error (or finish) */ + if (!iternext(iter)) { + goto finish_loop; } - } while (iternext(iter)); - } - if (needs_api && PyErr_Occurred()) { - goto finish_loop; + /* When skipping is done break and continue with faster loop */ + if (skip_first_count == 0) { + break; + } + } } do { @@ -2903,42 +2848,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides, strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; - - if (!masked) { - innerloop(dataptrs_copy, countptr, - strides_copy, innerloopdata); + if (masked) { + dataptrs_copy[3] = dataptrs[2]; + strides_copy[3] = strides[2]; } - else { - npy_intp count = *countptr; - char *maskptr = dataptrs[2]; - npy_intp mask_stride = strides[2]; - /* Optimization for when the mask is broadcast */ - npy_intp n = mask_stride == 0 ? count : 1; - while (count) { - char mask = *maskptr; - maskptr += mask_stride; - while (n < count && mask == *maskptr) { - n++; - maskptr += mask_stride; - } - /* If mask set, apply inner loop on this contiguous region */ - if (mask) { - innerloop(dataptrs_copy, &n, - strides_copy, innerloopdata); - } - dataptrs_copy[0] += n * strides[0]; - dataptrs_copy[1] += n * strides[1]; - dataptrs_copy[2] = dataptrs_copy[0]; - count -= n; - n = 1; - } + + retval = strided_loop(context, + dataptrs_copy, countptr, strides_copy, auxdata); + if (retval < 0) { + goto finish_loop; } - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + + } while (iternext(iter)); finish_loop: NPY_END_THREADS; - return (needs_api && PyErr_Occurred()) ? -1 : 0; + return retval; } /* @@ -2959,15 +2885,14 @@ finish_loop: * this function does not validate them. */ static PyArrayObject * -PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, - int naxes, int *axes, PyArray_Descr *odtype, int keepdims, +PyUFunc_Reduce(PyUFuncObject *ufunc, + PyArrayObject *arr, PyArrayObject *out, + int naxes, int *axes, PyArray_DTypeMeta *signature[3], int keepdims, PyObject *initial, PyArrayObject *wheremask) { int iaxes, ndim; npy_bool reorderable; npy_bool axis_flags[NPY_MAXDIMS]; - PyArray_Descr *dtype; - PyArrayObject *result; PyObject *identity; const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ @@ -2994,6 +2919,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, } /* Get the identity */ + /* TODO: Both of these should be provided by the ArrayMethod! */ identity = _get_identity(ufunc, &reorderable); if (identity == NULL) { return NULL; @@ -3017,21 +2943,27 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, Py_INCREF(initial); /* match the reference count in the if above */ } - /* Get the reduction dtype */ - if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) { + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_FALSE, descrs, "reduce"); + if (ufuncimpl == NULL) { Py_DECREF(initial); return NULL; } - result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype, - NPY_UNSAFE_CASTING, - axis_flags, reorderable, - keepdims, - initial, - reduce_loop, - ufunc, buffersize, ufunc_name, errormask); + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = descrs, + }; - Py_DECREF(dtype); + PyArrayObject *result = PyUFunc_ReduceWrapper(&context, + arr, out, wheremask, axis_flags, reorderable, keepdims, + initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask); + + for (int i = 0; i < 3; i++) { + Py_DECREF(descrs[i]); + } Py_DECREF(initial); return result; } @@ -3039,23 +2971,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, static PyObject * PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, - int axis, int otype) + int axis, PyArray_DTypeMeta *signature[3]) { PyArrayObject *op[2]; - PyArray_Descr *op_dtypes[2] = {NULL, NULL}; int op_axes_arrays[2][NPY_MAXDIMS]; int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]}; npy_uint32 op_flags[2]; - int idim, ndim, otype_final; + int idim, ndim; int needs_api, need_outer_iterator; - NpyIter *iter = NULL; + int res = 0; - /* The selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata = NULL; - const char *ufunc_name = ufunc_get_name_cstr(ufunc); + NpyIter *iter = NULL; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; @@ -3077,42 +3007,32 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, /* Take a reference to out for later returning */ Py_XINCREF(out); - otype_final = otype; - if (get_binary_op_function(ufunc, &otype_final, - &innerloop, &innerloopdata) < 0) { - PyArray_Descr *dtype = PyArray_DescrFromType(otype); - PyErr_Format(PyExc_ValueError, - "could not find a matching type for %s.accumulate, " - "requested type has type code '%c'", - ufunc_name, dtype ? dtype->type : '-'); - Py_XDECREF(dtype); - goto fail; + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_TRUE, descrs, "accumulate"); + if (ufuncimpl == NULL) { + return NULL; } - ndim = PyArray_NDIM(arr); + /* The below code assumes that all descriptors are identical: */ + assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]); - /* - * Set up the output data type, using the input's exact - * data type if the type number didn't change to preserve - * metadata - */ - if (PyArray_DESCR(arr)->type_num == otype_final) { - if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) { - op_dtypes[0] = PyArray_DESCR(arr); - Py_INCREF(op_dtypes[0]); - } - else { - op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr), - NPY_NATIVE); - } - } - else { - op_dtypes[0] = PyArray_DescrFromType(otype_final); - } - if (op_dtypes[0] == NULL) { + if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) { + /* This can be removed, but the initial element copy needs fixing */ + PyErr_SetString(PyExc_TypeError, + "accumulation currently only supports `object` dtype with " + "references"); goto fail; } + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = descrs, + }; + + ndim = PyArray_NDIM(arr); + #if NPY_UF_DBG_TRACING printf("Found %s.accumulate inner loop with dtype : ", ufunc_name); PyObject_Print((PyObject *)op_dtypes[0], stdout, 0); @@ -3138,9 +3058,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, need_outer_iterator = (ndim > 1); /* We can't buffer, so must do UPDATEIFCOPY */ if (!PyArray_ISALIGNED(arr) || (out && !PyArray_ISALIGNED(out)) || - !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr)) || + !PyArray_EquivTypes(descrs[1], PyArray_DESCR(arr)) || (out && - !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) { + !PyArray_EquivTypes(descrs[0], PyArray_DESCR(out)))) { need_outer_iterator = 1; } /* If input and output overlap in memory, use iterator to figure it out */ @@ -3153,7 +3073,6 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| NPY_ITER_COPY_IF_OVERLAP; - PyArray_Descr **op_dtypes_param = NULL; /* * The way accumulate is set up, we can't do buffering, @@ -3170,13 +3089,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, */ op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - op_dtypes_param = op_dtypes; - op_dtypes[1] = op_dtypes[0]; + NPY_UF_DBG_PRINT("Allocating outer iterator\n"); iter = NpyIter_AdvancedNew(2, op, flags, NPY_KEEPORDER, NPY_UNSAFE_CASTING, - op_flags, - op_dtypes_param, + op_flags, descrs, ndim_iter, op_axes, NULL, 0); if (iter == NULL) { goto fail; @@ -3194,14 +3111,14 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, } } - /* Get the output */ + /* Get the output from the iterator if it was allocated */ if (out == NULL) { if (iter) { op[0] = out = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(out); } else { - PyArray_Descr *dtype = op_dtypes[0]; + PyArray_Descr *dtype = descrs[0]; Py_INCREF(dtype); op[0] = out = (PyArrayObject *)PyArray_NewFromDescr( &PyArray_Type, dtype, @@ -3210,10 +3127,31 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, if (out == NULL) { goto fail; } - } } + npy_intp fixed_strides[3]; + if (need_outer_iterator) { + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + } + else { + fixed_strides[0] = PyArray_STRIDES(op[0])[axis]; + fixed_strides[1] = PyArray_STRIDES(op[1])[axis]; + fixed_strides[2] = fixed_strides[0]; + } + + + NPY_ARRAYMETHOD_FLAGS flags = 0; + if (ufuncimpl->get_strided_loop(&context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } + /* * If the reduction axis has size zero, either return the reduction * unit for UFUNC_REDUCE, or return the zero-sized output array @@ -3234,7 +3172,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, NpyIter_IterNextFunc *iternext; char **dataptr; - int itemsize = op_dtypes[0]->elsize; + int itemsize = descrs[0]->elsize; /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -3242,8 +3180,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); - needs_api = NpyIter_IterationNeedsAPI(iter); - + needs_api |= NpyIter_IterationNeedsAPI(iter); /* Execute the loop with just the outer iterator */ count_m1 = PyArray_DIM(op[1], axis)-1; @@ -3257,7 +3194,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, stride_copy[1] = stride1; stride_copy[2] = stride0; - NPY_BEGIN_THREADS_NDITER(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } do { dataptr_copy[0] = dataptr[0]; @@ -3270,7 +3209,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, * Output (dataptr[0]) and input (dataptr[1]) may point to * the same memory, e.g. np.add.accumulate(a, out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. @@ -3290,18 +3229,17 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, dataptr_copy[2] += stride0; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count_m1); - innerloop(dataptr_copy, &count_m1, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count_m1, stride_copy, auxdata); } - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + } while (res == 0 && iternext(iter)); NPY_END_THREADS; } else if (iter == NULL) { char *dataptr_copy[3]; - npy_intp stride_copy[3]; - int itemsize = op_dtypes[0]->elsize; + int itemsize = descrs[0]->elsize; /* Execute the loop with no iterators */ npy_intp count = PyArray_DIM(op[1], axis); @@ -3315,15 +3253,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, PyArray_NDIM(op[0]))) { PyErr_SetString(PyExc_ValueError, "provided out is the wrong size " - "for the reduction"); + "for the accumulation."); goto fail; } stride0 = PyArray_STRIDE(op[0], axis); - stride_copy[0] = stride0; - stride_copy[1] = stride1; - stride_copy[2] = stride0; - /* Turn the two items into three for the inner loop */ dataptr_copy[0] = PyArray_BYTES(op[0]); dataptr_copy[1] = PyArray_BYTES(op[1]); @@ -3335,7 +3269,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, * Output (dataptr[0]) and input (dataptr[1]) may point to the * same memory, e.g. np.add.accumulate(a, out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. @@ -3356,25 +3290,34 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); - needs_api = PyDataType_REFCHK(op_dtypes[0]); + needs_api = PyDataType_REFCHK(descrs[0]); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(count); } - innerloop(dataptr_copy, &count, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count, fixed_strides, auxdata); NPY_END_THREADS; } } finish: - Py_XDECREF(op_dtypes[0]); - int res = 0; + NPY_AUXDATA_FREE(auxdata); + Py_DECREF(descrs[0]); + Py_DECREF(descrs[1]); + Py_DECREF(descrs[2]); + if (!NpyIter_Deallocate(iter)) { res = -1; } + + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + res = _check_ufunc_fperr(errormask, NULL, "accumulate"); + } + if (res < 0) { Py_DECREF(out); return NULL; @@ -3384,7 +3327,11 @@ finish: fail: Py_XDECREF(out); - Py_XDECREF(op_dtypes[0]); + + NPY_AUXDATA_FREE(auxdata); + Py_XDECREF(descrs[0]); + Py_XDECREF(descrs[1]); + Py_XDECREF(descrs[2]); NpyIter_Deallocate(iter); @@ -3409,28 +3356,31 @@ fail: * indices[1::2] = range(1,len(array)) * * output shape is based on the size of indices + * + * TODO: Reduceat duplicates too much code from accumulate! */ static PyObject * PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, - PyArrayObject *out, int axis, int otype) + PyArrayObject *out, int axis, PyArray_DTypeMeta *signature[3]) { PyArrayObject *op[3]; - PyArray_Descr *op_dtypes[3] = {NULL, NULL, NULL}; int op_axes_arrays[3][NPY_MAXDIMS]; int *op_axes[3] = {op_axes_arrays[0], op_axes_arrays[1], op_axes_arrays[2]}; npy_uint32 op_flags[3]; - int idim, ndim, otype_final; - int need_outer_iterator = 0; + int idim, ndim; + int needs_api, need_outer_iterator = 0; + + int res = 0; NpyIter *iter = NULL; + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata = NULL; + /* The reduceat indices - ind must be validated outside this call */ npy_intp *reduceat_ind; npy_intp i, ind_size, red_axis_size; - /* The selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; const char *ufunc_name = ufunc_get_name_cstr(ufunc); char *opname = "reduceat"; @@ -3470,42 +3420,32 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, /* Take a reference to out for later returning */ Py_XINCREF(out); - otype_final = otype; - if (get_binary_op_function(ufunc, &otype_final, - &innerloop, &innerloopdata) < 0) { - PyArray_Descr *dtype = PyArray_DescrFromType(otype); - PyErr_Format(PyExc_ValueError, - "could not find a matching type for %s.%s, " - "requested type has type code '%c'", - ufunc_name, opname, dtype ? dtype->type : '-'); - Py_XDECREF(dtype); - goto fail; + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_TRUE, descrs, "reduceat"); + if (ufuncimpl == NULL) { + return NULL; } - ndim = PyArray_NDIM(arr); + /* The below code assumes that all descriptors are identical: */ + assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]); - /* - * Set up the output data type, using the input's exact - * data type if the type number didn't change to preserve - * metadata - */ - if (PyArray_DESCR(arr)->type_num == otype_final) { - if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) { - op_dtypes[0] = PyArray_DESCR(arr); - Py_INCREF(op_dtypes[0]); - } - else { - op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr), - NPY_NATIVE); - } - } - else { - op_dtypes[0] = PyArray_DescrFromType(otype_final); - } - if (op_dtypes[0] == NULL) { + if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) { + /* This can be removed, but the initial element copy needs fixing */ + PyErr_SetString(PyExc_TypeError, + "reduceat currently only supports `object` dtype with " + "references"); goto fail; } + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = descrs, + }; + + ndim = PyArray_NDIM(arr); + #if NPY_UF_DBG_TRACING printf("Found %s.%s inner loop with dtype : ", ufunc_name, opname); PyObject_Print((PyObject *)op_dtypes[0], stdout, 0); @@ -3532,11 +3472,13 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, op[2] = ind; if (out != NULL || ndim > 1 || !PyArray_ISALIGNED(arr) || - !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr))) { + !PyArray_EquivTypes(descrs[0], PyArray_DESCR(arr))) { need_outer_iterator = 1; } if (need_outer_iterator) { + PyArray_Descr *op_dtypes[3] = {descrs[0], descrs[1], NULL}; + npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| NPY_ITER_MULTI_INDEX| @@ -3565,8 +3507,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, NPY_UF_DBG_PRINT("Allocating outer iterator\n"); iter = NpyIter_AdvancedNew(3, op, flags, NPY_KEEPORDER, NPY_UNSAFE_CASTING, - op_flags, - op_dtypes, + op_flags, op_dtypes, ndim, op_axes, NULL, 0); if (iter == NULL) { goto fail; @@ -3590,11 +3531,15 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, Py_INCREF(out); } } - /* Allocate the output for when there's no outer iterator */ - else if (out == NULL) { - Py_INCREF(op_dtypes[0]); + else { + /* + * Allocate the output for when there's no outer iterator, we always + * use the outer_iteration path when `out` is passed. + */ + assert(out == NULL); + Py_INCREF(descrs[0]); op[0] = out = (PyArrayObject *)PyArray_NewFromDescr( - &PyArray_Type, op_dtypes[0], + &PyArray_Type, descrs[0], 1, &ind_size, NULL, NULL, 0, NULL); if (out == NULL) { @@ -3602,6 +3547,28 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, } } + npy_intp fixed_strides[3]; + if (need_outer_iterator) { + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + } + else { + fixed_strides[1] = PyArray_STRIDES(op[1])[axis]; + } + /* The reduce axis does not advance here in the strided-loop */ + fixed_strides[0] = 0; + fixed_strides[2] = 0; + + NPY_ARRAYMETHOD_FLAGS flags = 0; + if (ufuncimpl->get_strided_loop(&context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } + /* * If the output has zero elements, return now. */ @@ -3619,8 +3586,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, npy_intp stride0, stride1; npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); - int itemsize = op_dtypes[0]->elsize; - int needs_api = NpyIter_IterationNeedsAPI(iter); + int itemsize = descrs[0]->elsize; + needs_api |= NpyIter_IterationNeedsAPI(iter); /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -3640,10 +3607,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, stride_copy[1] = stride1; stride_copy[2] = stride0; - NPY_BEGIN_THREADS_NDITER(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } do { - for (i = 0; i < ind_size; ++i) { npy_intp start = reduceat_ind[i], end = (i == ind_size-1) ? count_m1+1 : @@ -3661,7 +3629,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, * to the same memory, e.g. * np.add.reduceat(a, np.arange(len(a)), out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of * the reference count being zero temporarily. @@ -3681,33 +3649,24 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, dataptr_copy[1] += stride1; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); - innerloop(dataptr_copy, &count, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count, stride_copy, auxdata); } } - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + } while (res == 0 && iternext(iter)); NPY_END_THREADS; } else if (iter == NULL) { char *dataptr_copy[3]; - npy_intp stride_copy[3]; - int itemsize = op_dtypes[0]->elsize; + int itemsize = descrs[0]->elsize; npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); - - /* Execute the loop with no iterators */ - npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis); - - int needs_api = PyDataType_REFCHK(op_dtypes[0]); + npy_intp stride1 = PyArray_STRIDE(op[1], axis); NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n"); - stride_copy[0] = stride0; - stride_copy[1] = stride1; - stride_copy[2] = stride0; - if (!needs_api) { NPY_BEGIN_THREADS; } @@ -3729,7 +3688,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, * the same memory, e.g. * np.add.reduceat(a, np.arange(len(a)), out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. @@ -3749,8 +3708,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, dataptr_copy[1] += stride1; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); - innerloop(dataptr_copy, &count, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count, fixed_strides, auxdata); + if (res != 0) { + break; + } } } @@ -3758,8 +3720,21 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, } finish: - Py_XDECREF(op_dtypes[0]); + NPY_AUXDATA_FREE(auxdata); + Py_DECREF(descrs[0]); + Py_DECREF(descrs[1]); + Py_DECREF(descrs[2]); + if (!NpyIter_Deallocate(iter)) { + res = -1; + } + + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + res = _check_ufunc_fperr(errormask, NULL, "reduceat"); + } + + if (res < 0) { Py_DECREF(out); return NULL; } @@ -3768,9 +3743,14 @@ finish: fail: Py_XDECREF(out); - Py_XDECREF(op_dtypes[0]); + + NPY_AUXDATA_FREE(auxdata); + Py_XDECREF(descrs[0]); + Py_XDECREF(descrs[1]); + Py_XDECREF(descrs[2]); NpyIter_Deallocate(iter); + return NULL; } @@ -3868,7 +3848,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL; PyObject *op = NULL; PyArrayObject *indices = NULL; - PyArray_Descr *otype = NULL; + PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL}; PyArrayObject *out = NULL; int keepdims = 0; PyObject *initial = NULL; @@ -4012,13 +3992,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, } if (otype_obj && otype_obj != Py_None) { /* Use `_get_dtype` because `dtype` is a DType and not the instance */ - PyArray_DTypeMeta *dtype = _get_dtype(otype_obj); - if (dtype == NULL) { + signature[0] = _get_dtype(otype_obj); + if (signature[0] == NULL) { goto fail; } - otype = dtype->singleton; - Py_INCREF(otype); - Py_DECREF(dtype); } if (out_obj && !PyArray_OutputConverter(out_obj, &out)) { goto fail; @@ -4038,15 +4015,6 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, ndim = PyArray_NDIM(mp); - /* Check to see that type (and otype) is not FLEXIBLE */ - if (PyArray_ISFLEXIBLE(mp) || - (otype && PyTypeNum_ISFLEXIBLE(otype->type_num))) { - PyErr_Format(PyExc_TypeError, - "cannot perform %s with flexible type", - _reduce_type[operation]); - goto fail; - } - /* Convert the 'axis' parameter into a list of axes */ if (axes_obj == NULL) { /* apply defaults */ @@ -4109,14 +4077,12 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, } /* - * If out is specified it determines otype - * unless otype already specified. + * If no dtype is specified and out is not specified, we override the + * integer and bool dtype used for add and multiply. + * + * TODO: The following should be handled by a promoter! */ - if (otype == NULL && out != NULL) { - otype = PyArray_DESCR(out); - Py_INCREF(otype); - } - if (otype == NULL) { + if (signature[0] == NULL && out == NULL) { /* * For integer types --- make sure at least a long * is used for add and multiply reduction to avoid overflow @@ -4136,16 +4102,17 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, typenum = NPY_LONG; } } + signature[0] = PyArray_DTypeFromTypeNum(typenum); } - otype = PyArray_DescrFromType(typenum); } - + Py_XINCREF(signature[0]); + signature[2] = signature[0]; switch(operation) { case UFUNC_REDUCE: - ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes, - otype, keepdims, initial, wheremask); - Py_XDECREF(wheremask); + ret = PyUFunc_Reduce(ufunc, + mp, out, naxes, axes, signature, keepdims, initial, wheremask); + Py_XSETREF(wheremask, NULL); break; case UFUNC_ACCUMULATE: if (ndim == 0) { @@ -4157,8 +4124,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, "accumulate does not allow multiple axes"); goto fail; } - ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0], - otype->type_num); + ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, + mp, out, axes[0], signature); break; case UFUNC_REDUCEAT: if (ndim == 0) { @@ -4171,19 +4138,22 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, goto fail; } ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc, - mp, indices, out, axes[0], otype->type_num); + mp, indices, out, axes[0], signature); Py_SETREF(indices, NULL); break; } + if (ret == NULL) { + goto fail; + } + + Py_DECREF(signature[0]); + Py_DECREF(signature[1]); + Py_DECREF(signature[2]); + Py_DECREF(mp); - Py_DECREF(otype); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); - if (ret == NULL) { - return NULL; - } - /* Wrap and return the output */ { /* Find __array_wrap__ - note that these rules are different to the @@ -4211,7 +4181,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, } fail: - Py_XDECREF(otype); + Py_XDECREF(signature[0]); + Py_XDECREF(signature[1]); + Py_XDECREF(signature[2]); + Py_XDECREF(mp); Py_XDECREF(wheremask); Py_XDECREF(indices); diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 7e24bc493..aa8d7b982 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -247,6 +247,28 @@ PyUFunc_ValidateCasting(PyUFuncObject *ufunc, } +/* + * Same as `PyUFunc_ValidateCasting` but only checks output casting. + */ +NPY_NO_EXPORT int +PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc, + NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes) +{ + int i, nin = ufunc->nin, nop = nin + ufunc->nout; + + for (i = nin; i < nop; ++i) { + if (operands[i] == NULL) { + continue; + } + if (!PyArray_CanCastTypeTo(dtypes[i], + PyArray_DESCR(operands[i]), casting)) { + return raise_output_casting_error( + ufunc, casting, dtypes[i], PyArray_DESCR(operands[i]), i); + } + } + return 0; +} + /*UFUNC_API * * This function applies the default type resolution rules diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index dd88a081a..84a2593f4 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -99,6 +99,10 @@ PyUFunc_DivmodTypeResolver(PyUFuncObject *ufunc, PyObject *type_tup, PyArray_Descr **out_dtypes); +NPY_NO_EXPORT int +PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc, + NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes); + /* * Does a linear search for the best inner loop of the ufunc. * diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c index a9954dfc1..272555704 100644 --- a/numpy/core/src/umath/umathmodule.c +++ b/numpy/core/src/umath/umathmodule.c @@ -22,6 +22,7 @@ #include "numpy/npy_math.h" #include "number.h" +#include "dispatching.h" static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om}; @@ -305,5 +306,33 @@ int initumath(PyObject *m) return -1; } + /* + * Set up promoters for logical functions + * TODO: This should probably be done at a better place, or even in the + * code generator directly. + */ + s = _PyDict_GetItemStringWithError(d, "logical_and"); + if (s == NULL) { + return -1; + } + if (install_logical_ufunc_promoter(s) < 0) { + return -1; + } + + s = _PyDict_GetItemStringWithError(d, "logical_or"); + if (s == NULL) { + return -1; + } + if (install_logical_ufunc_promoter(s) < 0) { + return -1; + } + + s = _PyDict_GetItemStringWithError(d, "logical_xor"); + if (s == NULL) { + return -1; + } + if (install_logical_ufunc_promoter(s) < 0) { + return -1; + } return 0; } diff --git a/numpy/core/tests/test_custom_dtypes.py b/numpy/core/tests/test_custom_dtypes.py index 5eb82bc93..e502a87ed 100644 --- a/numpy/core/tests/test_custom_dtypes.py +++ b/numpy/core/tests/test_custom_dtypes.py @@ -101,6 +101,22 @@ class TestSFloat: expected_view = a.view(np.float64) * b.view(np.float64) assert_array_equal(res.view(np.float64), expected_view) + def test_possible_and_impossible_reduce(self): + # For reductions to work, the first and last operand must have the + # same dtype. For this parametric DType that is not necessarily true. + a = self._get_array(2.) + # Addition reductin works (as of writing requires to pass initial + # because setting a scaled-float from the default `0` fails). + res = np.add.reduce(a, initial=0.) + assert res == a.astype(np.float64).sum() + + # But each multiplication changes the factor, so a reduction is not + # possible (the relaxed version of the old refusal to handle any + # flexible dtype). + with pytest.raises(TypeError, + match="the resolved dtypes are not compatible"): + np.multiply.reduce(a) + def test_basic_multiply_promotion(self): float_a = np.array([1., 2., 3.]) b = self._get_array(2.) @@ -145,3 +161,23 @@ class TestSFloat: # Check that casting the output fails also (done by the ufunc here) with pytest.raises(TypeError): np.add(a, a, out=c, casting="safe") + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_casts_to_bool(self, ufunc): + a = self._get_array(2.) + a[0] = 0. # make sure first element is considered False. + + float_equiv = a.astype(float) + expected = ufunc(float_equiv, float_equiv) + res = ufunc(a, a) + assert_array_equal(res, expected) + + # also check that the same works for reductions: + expected = ufunc.reduce(float_equiv) + res = ufunc.reduce(a) + assert_array_equal(res, expected) + + # The output casting does not match the bool, bool -> bool loop: + with pytest.raises(TypeError): + ufunc(a, a, out=np.empty(a.shape, dtype=int), casting="equiv") diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 69eba7ba0..b95d669a8 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -2029,6 +2029,21 @@ class TestDateTime: assert_equal(np.maximum.reduce(a), np.timedelta64(7, 's')) + def test_datetime_no_subtract_reducelike(self): + # subtracting two datetime64 works, but we cannot reduce it, since + # the result of that subtraction will have a different dtype. + arr = np.array(["2021-12-02", "2019-05-12"], dtype="M8[ms]") + msg = r"the resolved dtypes are not compatible with subtract\." + + with pytest.raises(TypeError, match=msg + "reduce"): + np.subtract.reduce(arr) + + with pytest.raises(TypeError, match=msg + "accumulate"): + np.subtract.accumulate(arr) + + with pytest.raises(TypeError, match=msg + "reduceat"): + np.subtract.reduceat(arr, [0]) + def test_datetime_busday_offset(self): # First Monday in June assert_equal( diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 30929ce91..78833a33c 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1362,6 +1362,14 @@ class TestUfunc: np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object), ) + def test_object_array_accumulate_failure(self): + # Typical accumulation on object works as expected: + res = np.add.accumulate(np.array([1, 0, 2], dtype=object)) + assert_array_equal(res, np.array([1, 1, 3], dtype=object)) + # But errors are propagated from the inner-loop if they occur: + with pytest.raises(TypeError): + np.add.accumulate([1, None, 2]) + def test_object_array_reduceat_inplace(self): # Checks that in-place reduceats work, see also gh-7465 arr = np.empty(4, dtype=object) @@ -1381,6 +1389,15 @@ class TestUfunc: np.add.reduceat(arr, np.arange(4), out=arr, axis=-1) assert_array_equal(arr, out) + def test_object_array_reduceat_failure(self): + # Reduceat works as expected when no invalid operation occurs (None is + # not involved in an operation here) + res = np.add.reduceat(np.array([1, None, 2], dtype=object), [1, 2]) + assert_array_equal(res, np.array([None, 2], dtype=object)) + # But errors when None would be involved in an operation: + with pytest.raises(TypeError): + np.add.reduceat([1, None, 2], [0, 2]) + def test_zerosize_reduction(self): # Test with default dtype and object dtype for a in [[], np.array([], dtype=object)]: @@ -2098,6 +2115,25 @@ class TestUfunc: with pytest.raises(TypeError): ufunc(a, a, signature=signature) + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_support_anything(self, ufunc): + # The logical ufuncs support even input that can't be promoted: + a = np.array('1') + c = np.array([1., 2.]) + assert_array_equal(ufunc(a, c), ufunc([True, True], True)) + assert ufunc.reduce(a) == True + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_out_cast_check(self, ufunc): + a = np.array('1') + c = np.array([1., 2.]) + out = a.copy() + with pytest.raises(TypeError): + # It would be safe, but not equiv casting: + ufunc(a, c, out=out, casting="equiv") + def test_reduce_noncontig_output(self): # Check that reduction deals with non-contiguous output arrays # appropriately. @@ -2119,6 +2155,22 @@ class TestUfunc: assert_equal(y_base[1,:], y_base_copy[1,:]) assert_equal(y_base[3,:], y_base_copy[3,:]) + @pytest.mark.parametrize("with_cast", [True, False]) + def test_reduceat_and_accumulate_out_shape_mismatch(self, with_cast): + # Should raise an error mentioning "shape" or "size" + arr = np.arange(5) + out = np.arange(3) # definitely wrong shape + if with_cast: + # If a cast is necessary on the output, we can be sure to use + # the generic NpyIter (non-fast) path. + out = out.astype(np.float64) + + with pytest.raises(ValueError, match="(shape|size)"): + np.add.reduceat(arr, [0, 3], out=out) + + with pytest.raises(ValueError, match="(shape|size)"): + np.add.accumulate(arr, out=out) + @pytest.mark.parametrize('out_shape', [(), (1,), (3,), (1, 1), (1, 3), (4, 3)]) @pytest.mark.parametrize('keepdims', [True, False]) @@ -2331,7 +2383,7 @@ def test_reduce_casterrors(offset): out = np.array(-1, dtype=np.intp) count = sys.getrefcount(value) - with pytest.raises(ValueError): + with pytest.raises(TypeError): # This is an unsafe cast, but we currently always allow that: np.add.reduce(arr, dtype=np.intp, out=out) assert count == sys.getrefcount(value) @@ -2340,3 +2392,18 @@ def test_reduce_casterrors(offset): # if the error happened immediately. # This does not define behaviour, the output is invalid and thus undefined assert out[()] < value * offset + + +@pytest.mark.parametrize("method", + [np.add.accumulate, np.add.reduce, + pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat")]) +def test_reducelike_floaterrors(method): + # adding inf and -inf creates an invalid float and should give a warning + arr = np.array([np.inf, 0, -np.inf]) + with np.errstate(all="warn"): + with pytest.warns(RuntimeWarning, match="invalid value"): + method(arr) + + with np.errstate(all="raise"): + with pytest.raises(FloatingPointError): + method(arr) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 2ff1667ba..036d6312c 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -1065,7 +1065,7 @@ class _MaskedBinaryOperation(_MaskedUFunc): tr = self.f.reduce(t, axis) mr = nomask else: - tr = self.f.reduce(t, axis, dtype=dtype or t.dtype) + tr = self.f.reduce(t, axis, dtype=dtype) mr = umath.logical_and.reduce(m, axis) if not tr.shape: |