diff options
Diffstat (limited to 'numpy')
| -rw-r--r-- | numpy/core/src/multiarray/array_method.c | 53 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/array_method.h | 30 | ||||
| -rw-r--r-- | numpy/core/src/umath/reduction.c | 114 | ||||
| -rw-r--r-- | numpy/core/src/umath/reduction.h | 6 | ||||
| -rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 67 | ||||
| -rw-r--r-- | numpy/core/tests/test_ufunc.py | 17 |
6 files changed, 195 insertions, 92 deletions
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c index bdbd60dbb..a289e62ab 100644 --- a/numpy/core/src/multiarray/array_method.c +++ b/numpy/core/src/multiarray/array_method.c @@ -27,15 +27,18 @@ * weak reference to the input DTypes. */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _UMATHMODULE #define _MULTIARRAYMODULE #include <npy_pycompat.h> #include "arrayobject.h" +#include "array_coercion.h" #include "array_method.h" #include "dtypemeta.h" #include "common_dtype.h" #include "convert_datatype.h" #include "common.h" +#include "numpy/ufuncobject.h" /* @@ -163,6 +166,53 @@ npy_default_get_strided_loop( } +/* Declared in `ufunc_object.c` */ +NPY_NO_EXPORT PyObject * +PyUFunc_GetIdentity(PyUFuncObject *ufunc, npy_bool *reorderable); + +/* + * The default `get_identity` attempts to look up the identity from the + * calling ufunc. + */ +static int +default_get_identity_function(PyArrayMethod_Context *context, + char *item, NPY_ARRAYMETHOD_IDENTITY_FLAGS *flags) +{ + *flags = 0; + + PyUFuncObject *ufunc = (PyUFuncObject *)context->caller; + if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) { + /* Should not happen, but if it does, just give up */ + return 0; + } + + npy_bool reorderable; + PyObject *tmp = PyUFunc_GetIdentity(ufunc, &reorderable); + if (tmp == NULL) { + return -1; + } + if (reorderable) { + *flags |= NPY_METH_IS_REORDERABLE; + } + if (item == NULL) { + /* Only reorderable flag was requested (user provided initial value) */ + return 0; + } + + if (tmp != Py_None) { + /* We do not consider the value an identity for object dtype */ + *flags |= NPY_METH_ITEM_IS_DEFAULT; + if (context->descriptors[0]->type_num != NPY_OBJECT) { + *flags |= NPY_METH_ITEM_IS_IDENTITY; + } + int res = PyArray_Pack(context->descriptors[0], item, tmp); + Py_DECREF(tmp); + return res; + } + return 0; +} + + /** * Validate that the input is usable to create a new ArrayMethod. * @@ -248,6 +298,7 @@ fill_arraymethod_from_slots( /* Set the defaults */ meth->get_strided_loop = &npy_default_get_strided_loop; meth->resolve_descriptors = &default_resolve_descriptors; + meth->get_identity = default_get_identity_function; /* Fill in the slots passed by the user */ /* @@ -284,6 +335,8 @@ fill_arraymethod_from_slots( case NPY_METH_unaligned_contiguous_loop: meth->unaligned_contiguous_loop = slot->pfunc; continue; + case NPY_METH_get_identity: + meth->get_identity = slot->pfunc; default: break; } diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h index c9ec8903d..ea022a7e6 100644 --- a/numpy/core/src/multiarray/array_method.h +++ b/numpy/core/src/multiarray/array_method.h @@ -104,6 +104,34 @@ typedef int (get_loop_function)( NPY_ARRAYMETHOD_FLAGS *flags); +typedef enum { + /* The value can be used as a default for empty reductions */ + NPY_METH_ITEM_IS_DEFAULT = 1 << 0, + /* The value represents the identity value */ + NPY_METH_ITEM_IS_IDENTITY = 1 << 1, + /* The operation is fully reorderable (iteration order may be optimized) */ + NPY_METH_IS_REORDERABLE = 1 << 2, +} NPY_ARRAYMETHOD_IDENTITY_FLAGS; + +/* + * Query an ArrayMethod for its identity (for use with reductions) and whether + * its operation is reorderable (commutative). These are not always the same: + * Matrix multiplication is non-commutative, but does have an identity. + * + * The function should fill `item` and flag whether this value may be used as + * a default and/or identity. + * (Normally, an identity is always a valid default. However, NumPy makes an + * exception for `object` dtypes to ensure type-safety of the result.) + * If neither `NPY_METH_ITEM_IS_DEFAULT` or `NPY_METH_ITEM_IS_IDENTITY` is + * given, the value should be left uninitialized (no cleanup will be done). + * + * The function must return 0 on success and -1 on error (and clean up `item`). + */ +typedef int (get_identity_function)( + PyArrayMethod_Context *context, char *item, + NPY_ARRAYMETHOD_IDENTITY_FLAGS *flags); + + /* * The following functions are only used be the wrapping array method defined * in umath/wrapping_array_method.c @@ -198,6 +226,7 @@ typedef struct PyArrayMethodObject_tag { PyArrayMethod_StridedLoop *contiguous_loop; PyArrayMethod_StridedLoop *unaligned_strided_loop; PyArrayMethod_StridedLoop *unaligned_contiguous_loop; + get_identity_function *get_identity; /* Chunk only used for wrapping array method defined in umath */ struct PyArrayMethodObject_tag *wrapped_meth; PyArray_DTypeMeta **wrapped_dtypes; @@ -237,6 +266,7 @@ extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type; #define NPY_METH_contiguous_loop 4 #define NPY_METH_unaligned_strided_loop 5 #define NPY_METH_unaligned_contiguous_loop 6 +#define NPY_METH_get_identity 7 /* diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 817f99a04..bdb4e6a44 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -17,6 +17,7 @@ #include "numpy/arrayobject.h" #include "npy_pycompat.h" +#include "array_assign.h" #include "ctors.h" #include "numpy/ufuncobject.h" @@ -152,18 +153,12 @@ PyArray_CopyInitialReduceValues( * so that support can be added in the future without breaking * API compatibility. Pass in NULL. * axis_flags : Flags indicating the reduction axes of 'operand'. - * reorderable : If True, the reduction being done is reorderable, which - * means specifying multiple axes of reduction at once is ok, - * and the reduction code may calculate the reduction in an - * arbitrary order. The calculation may be reordered because - * of cache behavior or multithreading requirements. * keepdims : If true, leaves the reduction dimensions in the result * with size one. * subok : If true, the result uses the subclass of operand, otherwise * it is always a base class ndarray. - * identity : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise - * this value is used to initialize the result to - * the reduction's unit. + * initial : Initial value, if NULL the default is fetched from the + * ArrayMethod (typically as the default from the ufunc). * loop : `reduce_loop` from `ufunc_object.c`. TODO: Refactor * data : Data which is passed to the inner loop. * buffersize : Buffer size for the iterator. For the default, pass in 0. @@ -182,9 +177,9 @@ PyArray_CopyInitialReduceValues( NPY_NO_EXPORT PyArrayObject * PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask, - npy_bool *axis_flags, int reorderable, int keepdims, - PyObject *identity, PyArray_ReduceLoopFunc *loop, - void *data, npy_intp buffersize, const char *funcname, int errormask) + npy_bool *axis_flags, int keepdims, + PyObject *initial, PyArray_ReduceLoopFunc *loop, + npy_intp buffersize, const char *funcname, int errormask) { assert(loop != NULL); PyArrayObject *result = NULL; @@ -198,38 +193,54 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, /* Loop auxdata (must be freed on error) */ NpyAuxData *auxdata = NULL; - /* More than one axis means multiple orders are possible */ - if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { - PyErr_Format(PyExc_ValueError, - "reduction operation '%s' is not reorderable, " - "so at most one axis may be specified", - funcname); - return NULL; - } - /* Can only use where with an initial ( from identity or argument) */ - if (wheremask != NULL && identity == Py_None) { - PyErr_Format(PyExc_ValueError, - "reduction operation '%s' does not have an identity, " - "so to use a where mask one has to specify 'initial'", - funcname); - return NULL; - } - - /* Set up the iterator */ op[0] = out; op[1] = operand; op_dtypes[0] = context->descriptors[0]; op_dtypes[1] = context->descriptors[1]; + /* + * Fill in default or identity value and ask if this is reorderable. + * Note that if the initial value was provided, we pass `initial_buf=NULL` + * to the `get_identity` function to indicate that we only require the + * reorderable flag. + * If a `result` was passed in, it is possible that the result has a dtype + * differing to the operation one. + */ + NPY_ARRAYMETHOD_IDENTITY_FLAGS identity_flags = 0; + char *identity_buf = NULL; + if (initial == NULL) { + /* Always init buffer (only necessary if it holds references) */ + identity_buf = PyMem_Calloc(1, op_dtypes[0]->elsize); + if (identity_buf == NULL) { + PyErr_NoMemory(); + goto fail; + } + } + if (context->method->get_identity( + context, identity_buf, &identity_flags) < 0) { + goto fail; + } + /* More than one axis means multiple orders are possible */ + if (!(identity_flags & NPY_METH_IS_REORDERABLE) + && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { + PyErr_Format(PyExc_ValueError, + "reduction operation '%s' is not reorderable, " + "so at most one axis may be specified", + funcname); + goto fail; + } + it_flags = NPY_ITER_BUFFERED | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_GROWINNER | - NPY_ITER_DONT_NEGATE_STRIDES | NPY_ITER_ZEROSIZE_OK | NPY_ITER_REFS_OK | NPY_ITER_DELAY_BUFALLOC | NPY_ITER_COPY_IF_OVERLAP; + if (!(identity_flags & NPY_METH_IS_REORDERABLE)) { + it_flags |= NPY_ITER_DONT_NEGATE_STRIDES; + } op_flags[0] = NPY_ITER_READWRITE | NPY_ITER_ALIGNED | NPY_ITER_ALLOCATE | @@ -298,6 +309,7 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, } result = NpyIter_GetOperandArray(iter)[0]; + npy_bool empty_reduce = NpyIter_GetIterSize(iter) == 0; PyArrayMethod_StridedLoop *strided_loop; NPY_ARRAYMETHOD_FLAGS flags = 0; @@ -313,12 +325,40 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, * Initialize the result to the reduction unit if possible, * otherwise copy the initial values and get a view to the rest. */ - if (identity != Py_None) { - if (PyArray_FillWithScalar(result, identity) < 0) { + if (initial != NULL && initial != Py_None) { + /* + * User provided an `initial` value and it is not `None`. + * NOTE: It may make sense to accept array-valued `initial`, + * this would subtly (but rarely) change the coercion of + * `initial`. But it would be perfectly fine otherwise. + */ + if (PyArray_FillWithScalar(result, initial) < 0) { + goto fail; + } + } + else if (identity_buf != NULL && ( /* cannot fill for `initial=None` */ + identity_flags & NPY_METH_ITEM_IS_IDENTITY || + (empty_reduce && identity_flags & NPY_METH_ITEM_IS_DEFAULT))) { + /* Loop provided an identity or default value, assign to result. */ + int ret = raw_array_assign_scalar( + PyArray_NDIM(result), PyArray_DIMS(result), + PyArray_DESCR(result), + PyArray_BYTES(result), PyArray_STRIDES(result), + op_dtypes[0], identity_buf); + if (ret < 0) { goto fail; } } else { + /* Can only use where with an initial (from identity or argument) */ + if (wheremask != NULL) { + PyErr_Format(PyExc_ValueError, + "reduction operation '%s' does not have an identity, " + "so to use a where mask one has to specify 'initial'", + funcname); + return NULL; + } + /* * For 1-D skip_first_count could be optimized to 0, but no-identity * reductions are not super common. @@ -354,7 +394,7 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, } } - if (NpyIter_GetIterSize(iter) != 0) { + if (!empty_reduce) { NpyIter_IterNextFunc *iternext; char **dataptr; npy_intp *strideptr; @@ -387,6 +427,10 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, } Py_INCREF(result); + if (identity_buf != NULL && PyDataType_REFCHK(PyArray_DESCR(result))) { + PyArray_Item_XDECREF(identity_buf, PyArray_DESCR(result)); + } + PyMem_FREE(identity_buf); NPY_AUXDATA_FREE(auxdata); if (!NpyIter_Deallocate(iter)) { Py_DECREF(result); @@ -395,6 +439,10 @@ PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, return result; fail: + if (identity_buf != NULL && PyDataType_REFCHK(op_dtypes[0])) { + PyArray_Item_XDECREF(identity_buf, op_dtypes[0]); + } + PyMem_FREE(identity_buf); NPY_AUXDATA_FREE(auxdata); if (iter != NULL) { NpyIter_Deallocate(iter); diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h index 2170e27a7..d2cbe4849 100644 --- a/numpy/core/src/umath/reduction.h +++ b/numpy/core/src/umath/reduction.h @@ -64,8 +64,8 @@ typedef int (PyArray_ReduceLoopFunc)(PyArrayMethod_Context *context, NPY_NO_EXPORT PyArrayObject * PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask, - npy_bool *axis_flags, int reorderable, int keepdims, - PyObject *identity, PyArray_ReduceLoopFunc *loop, - void *data, npy_intp buffersize, const char *funcname, int errormask); + npy_bool *axis_flags, int keepdims, + PyObject *initial, PyArray_ReduceLoopFunc *loop, + npy_intp buffersize, const char *funcname, int errormask); #endif diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 62e06f281..9d3a153c4 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2085,12 +2085,16 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op, } /* - * Returns a new reference + * Returns a new reference to the ufunc identity. Note that this identity + * is only a default identity value stored on the ufunc, since the invidiual + * ufunc loop (ArrayMethod) is queried for the actual identity. + * * TODO: store a reference in the ufunc object itself, rather than * constructing one each time */ -static PyObject * -_get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) { +NPY_NO_EXPORT PyObject * +PyUFunc_GetIdentity(PyUFuncObject *ufunc, npy_bool *reorderable) +{ switch(ufunc->identity) { case PyUFunc_One: *reorderable = 1; @@ -3006,10 +3010,8 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyObject *initial, PyArrayObject *wheremask) { int iaxes, ndim; - npy_bool reorderable; npy_bool axis_flags[NPY_MAXDIMS]; - PyArrayObject *result = NULL; - PyObject *identity; + const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ int buffersize = 0, errormask = 0; @@ -3034,9 +3036,6 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, return NULL; } - /* - * Promote and fetch ufuncimpl (currently needed to fix up the identity). - */ PyArray_Descr *descrs[3]; PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, arr, out, signature, NPY_FALSE, descrs, NPY_UNSAFE_CASTING, "reduce"); @@ -3044,62 +3043,20 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, return NULL; } - /* Get the identity */ - /* TODO: Both of these should be provided by the ArrayMethod! */ - identity = _get_identity(ufunc, &reorderable); - if (identity == NULL) { - goto finish; - } - - /* Get the initial value */ - if (initial == NULL) { - initial = identity; - - /* - * The identity for a dynamic dtype like - * object arrays can't be used in general - */ - if (initial != Py_None && PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { - Py_DECREF(initial); - initial = Py_None; - Py_INCREF(initial); - } - else if (PyTypeNum_ISUNSIGNED(descrs[2]->type_num) - && PyLong_CheckExact(initial)) { - /* - * This is a bit of a hack until we have truly loop specific - * identities. Python -1 cannot be cast to unsigned so convert - * it to a NumPy scalar, but we use -1 for bitwise functions to - * signal all 1s. - * (A builtin identity would not overflow here, although we may - * unnecessary convert 0 and 1.) - */ - Py_SETREF(initial, PyObject_CallFunctionObjArgs( - (PyObject *)&PyLongArrType_Type, initial, NULL)); - if (initial == NULL) { - goto finish; - } - } - } else { - Py_DECREF(identity); - Py_INCREF(initial); /* match the reference count in the if above */ - } - PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = descrs, }; - result = PyUFunc_ReduceWrapper(&context, - arr, out, wheremask, axis_flags, reorderable, keepdims, - initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask); + PyArrayObject *result = PyUFunc_ReduceWrapper(&context, + arr, out, wheremask, axis_flags, keepdims, + initial, reduce_loop, buffersize, ufunc_name, errormask); finish: for (int i = 0; i < 3; i++) { Py_DECREF(descrs[i]); } - Py_XDECREF(initial); return result; } @@ -7018,7 +6975,7 @@ static PyObject * ufunc_get_identity(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { npy_bool reorderable; - return _get_identity(ufunc, &reorderable); + return PyUFunc_GetIdentity(ufunc, &reorderable); } static PyObject * diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 34cdd88c6..ea6ef2c12 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1650,6 +1650,18 @@ class TestUfunc: a = a[1:, 1:, 1:] self.check_identityless_reduction(a) + def test_reduce_identity_depends_on_loop(self): + """ + The type of the result should always depend on the selected loop, not + necessarily the output (only relevant for object arrays). + """ + # For an object loop, the default value 0 with type int is used: + assert type(np.add.reduce([], dtype=object)) is int + out = np.array(None, dtype=object) + # When the loop is float but `out` is object this does not happen: + np.add.reduce([], out=out, dtype=np.float64) + assert type(out[()]) is not int + def test_initial_reduction(self): # np.minimum.reduce is an identityless reduction @@ -1670,6 +1682,8 @@ class TestUfunc: # Check initial=None raises ValueError for both types of ufunc reductions assert_raises(ValueError, np.minimum.reduce, [], initial=None) assert_raises(ValueError, np.add.reduce, [], initial=None) + # Also in the somewhat special object case: + assert_raises(ValueError, np.add.reduce, [], initial=None, dtype=object) # Check that np._NoValue gives default behavior. assert_equal(np.add.reduce([], initial=np._NoValue), 0) @@ -2617,7 +2631,8 @@ def test_reduce_casterrors(offset): with pytest.raises(ValueError, match="invalid literal"): # This is an unsafe cast, but we currently always allow that. # Note that the double loop is picked, but the cast fails. - np.add.reduce(arr, dtype=np.intp, out=out) + # `initial=None` disables the use of an identity here. + np.add.reduce(arr, dtype=np.intp, out=out, initial=None) assert count == sys.getrefcount(value) # If an error occurred during casting, the operation is done at most until # the error occurs (the result of which would be `value * offset`) and -1 |
