diff options
-rw-r--r-- | doc/source/reference/c-api/types-and-structures.rst | 10 | ||||
-rw-r--r-- | numpy/core/include/numpy/ufuncobject.h | 6 | ||||
-rw-r--r-- | numpy/core/setup.py | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_method.c | 76 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_method.h | 20 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 5 | ||||
-rw-r--r-- | numpy/core/src/umath/dispatching.c | 688 | ||||
-rw-r--r-- | numpy/core/src/umath/dispatching.h | 22 | ||||
-rw-r--r-- | numpy/core/src/umath/legacy_array_method.c | 257 | ||||
-rw-r--r-- | numpy/core/src/umath/legacy_array_method.h | 33 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 776 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.c | 36 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.h | 3 | ||||
-rw-r--r-- | numpy/core/tests/test_scalarmath.py | 4 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 20 |
15 files changed, 1604 insertions, 354 deletions
diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 75a97c20e..54a1e09e1 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -818,6 +818,7 @@ PyUFunc_Type and PyUFuncObject npy_intp *core_dim_sizes; npy_uint32 *core_dim_flags; PyObject *identity_value; + /* Further private slots (size depends on the NumPy version) */ } PyUFuncObject; .. c:macro: PyObject_HEAD @@ -957,9 +958,12 @@ PyUFunc_Type and PyUFuncObject .. c:member:: PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector - A function which returns an inner loop. The ``legacy`` in the name arises - because for NumPy 1.6 a better variant had been planned. This variant - has not yet come about. + .. deprecated:: 1.22 + + Some fallback support for this slot exists, but will be removed + eventually. A univiersal function which relied on this will have + eventually have to be ported. + See ref:`NEP 41 <NEP41>` and ref:`NEP 43 <NEP43>` .. c:member:: void *reserved2 diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 0f3b8529a..fd7307703 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -211,6 +211,12 @@ typedef struct _tagPyUFuncObject { /* Identity for reduction, when identity == PyUFunc_IdentityValue */ PyObject *identity_value; + /* New in NPY_API_VERSION 0x0000000F and above */ + + /* New private fields related to dispatching */ + void *_dispatch_cache; + /* A PyListObject of `(tuple of DTypes, ArrayMethod/Promoter)` */ + PyObject *_loops; } PyUFuncObject; #include "arrayobject.h" diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 29d309f74..c20320910 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -928,6 +928,8 @@ def configuration(parent_package='',top_path=None): join('src', 'umath', 'matmul.c.src'), join('src', 'umath', 'clip.h.src'), join('src', 'umath', 'clip.c.src'), + join('src', 'umath', 'dispatching.c'), + join('src', 'umath', 'legacy_array_method.c'), join('src', 'umath', 'ufunc_object.c'), join('src', 'umath', 'extobj.c'), join('src', 'umath', 'scalarmath.c.src'), diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c index cc841ee64..c1b6d4e71 100644 --- a/numpy/core/src/multiarray/array_method.c +++ b/numpy/core/src/multiarray/array_method.c @@ -757,9 +757,6 @@ boundarraymethod__simple_strided_call( /* - * TODO: Currently still based on the old ufunc system and not ArrayMethod! - * This requires fixing the ufunc code first. - * * Support for masked inner-strided loops. Masked inner-strided loops are * only used in the ufunc machinery. So this special cases them. * In the future it probably makes sense to create an:: @@ -770,8 +767,8 @@ boundarraymethod__simple_strided_call( */ typedef struct { NpyAuxData base; - PyUFuncGenericFunction unmasked_stridedloop; - void *innerloopdata; + PyArrayMethod_StridedLoop *unmasked_stridedloop; + NpyAuxData *unmasked_auxdata; int nargs; char *dataptrs[]; } _masked_stridedloop_data; @@ -781,6 +778,7 @@ static void _masked_stridedloop_data_free(NpyAuxData *auxdata) { _masked_stridedloop_data *data = (_masked_stridedloop_data *)auxdata; + NPY_AUXDATA_FREE(data->unmasked_auxdata); PyMem_Free(data); } @@ -790,15 +788,15 @@ _masked_stridedloop_data_free(NpyAuxData *auxdata) * masked strided-loop, only calling the function for elements * where the mask is True. */ -static void -unmasked_ufunc_loop_as_masked( - char **data, const npy_intp *dimensions, - const npy_intp *strides, void *_auxdata) +static int +generic_masked_strided_loop(PyArrayMethod_Context *context, + char *const *data, const npy_intp *dimensions, + const npy_intp *strides, NpyAuxData *_auxdata) { _masked_stridedloop_data *auxdata = (_masked_stridedloop_data *)_auxdata; int nargs = auxdata->nargs; - PyUFuncGenericFunction strided_loop = auxdata->unmasked_stridedloop; - void *innerloopdata = auxdata->innerloopdata; + PyArrayMethod_StridedLoop *strided_loop = auxdata->unmasked_stridedloop; + NpyAuxData *strided_loop_auxdata = auxdata->unmasked_auxdata; char **dataptrs = auxdata->dataptrs; memcpy(dataptrs, data, nargs * sizeof(char *)); @@ -819,39 +817,37 @@ unmasked_ufunc_loop_as_masked( /* Process unmasked values */ mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 0); - strided_loop(dataptrs, &subloopsize, strides, innerloopdata); + int res = strided_loop(context, + dataptrs, &subloopsize, strides, strided_loop_auxdata); + if (res != 0) { + return res; + } for (int i = 0; i < nargs; i++) { dataptrs[i] += subloopsize * strides[i]; } N -= subloopsize; } while (N > 0); + + return 0; } /* - * TODO: This function will be the masked equivalent to `get_loop`. - * This function wraps a legacy inner loop so it becomes masked. - * - * Returns 0 on success, -1 on error. + * Identical to the `get_loop` functions and wraps it. This adds support + * to a boolean mask being passed in as a last, additional, operand. + * The wrapped loop will only be called for unmasked elements. + * (Does not support `move_references` or inner dimensions!) */ NPY_NO_EXPORT int -PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, - PyArray_Descr **dtypes, - PyUFuncGenericFunction *out_innerloop, - NpyAuxData **out_innerloopdata, - int *out_needs_api) +PyArrayMethod_GetMaskedStridedLoop( + PyArrayMethod_Context *context, + int aligned, npy_intp *fixed_strides, + PyArrayMethod_StridedLoop **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) { - int retcode; _masked_stridedloop_data *data; - int nargs = ufunc->nin + ufunc->nout; - - if (ufunc->legacy_inner_loop_selector == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "the ufunc default masked inner loop selector doesn't " - "yet support wrapping the new inner loop selector, it " - "still only wraps the legacy inner loop selector"); - return -1; - } + int nargs = context->method->nin + context->method->nout; /* Add working memory for the data pointers, to modify them in-place */ data = PyMem_Malloc(sizeof(_masked_stridedloop_data) + @@ -865,18 +861,14 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, data->unmasked_stridedloop = NULL; data->nargs = nargs; - /* Get the unmasked ufunc inner loop */ - retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &data->unmasked_stridedloop, &data->innerloopdata, - out_needs_api); - if (retcode < 0) { - PyArray_free(data); - return retcode; + if (context->method->get_strided_loop(context, + aligned, 0, fixed_strides, + &data->unmasked_stridedloop, &data->unmasked_auxdata, flags) < 0) { + PyMem_Free(data); + return -1; } - - /* Return the loop function + aux data */ - *out_innerloop = &unmasked_ufunc_loop_as_masked; - *out_innerloopdata = (NpyAuxData *)data; + *out_transferdata = (NpyAuxData *)data; + *out_loop = generic_masked_strided_loop; return 0; } diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h index c2122a2da..fc2304889 100644 --- a/numpy/core/src/multiarray/array_method.h +++ b/numpy/core/src/multiarray/array_method.h @@ -17,6 +17,7 @@ typedef enum { * setup/check. No function should set error flags and ignore them * since it would interfere with chaining operations (e.g. casting). */ + /* TODO: Change this into a positive flag */ NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2, /* Whether the method supports unaligned access (not runtime) */ NPY_METH_SUPPORTS_UNALIGNED = 1 << 3, @@ -158,17 +159,16 @@ npy_default_get_strided_loop( PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags); -/* - * TODO: This function will not rely on the current ufunc code after the - * ufunc refactor. - */ -#include "numpy/ufuncobject.h" + NPY_NO_EXPORT int -PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, - PyArray_Descr **dtypes, - PyUFuncGenericFunction *out_innerloop, - NpyAuxData **out_innerloopdata, - int *out_needs_api); +PyArrayMethod_GetMaskedStridedLoop( + PyArrayMethod_Context *context, + int aligned, + npy_intp *fixed_strides, + PyArrayMethod_StridedLoop **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + /* * TODO: This function is the internal version, and its error paths may diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index a0154e474..98d4f5a75 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -449,6 +449,11 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags, /* * If REFS_OK was specified, check whether there are any * reference arrays and flag it if so. + * + * NOTE: This really should be unnecessary, but chances are someone relies + * on it. The iterator itself does not require the API here + * as it only does so for casting/buffering. But in almost all + * use-cases the API will be required for whatever operation is done. */ if (flags & NPY_ITER_REFS_OK) { for (iop = 0; iop < nop; ++iop) { diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c new file mode 100644 index 000000000..e63780458 --- /dev/null +++ b/numpy/core/src/umath/dispatching.c @@ -0,0 +1,688 @@ +/* + * This file implements universal function dispatching and promotion (which + * is necessary to happen before dispatching). + * This is part of the UFunc object. Promotion and dispatching uses the + * following things: + * + * - operand_DTypes: The datatypes as passed in by the user. + * - signature: The DTypes fixed by the user with `dtype=` or `signature=`. + * - ufunc._loops: A list of all ArrayMethods and promoters, it contains + * tuples `(dtypes, ArrayMethod)` or `(dtypes, promoter)`. + * - ufunc._dispatch_cache: A cache to store previous promotion and/or + * dispatching results. + * - The actual arrays are used to support the old code paths where necessary. + * (this includes any value-based casting/promotion logic) + * + * In general, `operand_Dtypes` is always overridden by `signature`. If a + * DType is included in the `signature` it must match precisely. + * + * The process of dispatching and promotion can be summarized in the following + * steps: + * + * 1. Override any `operand_DTypes` from `signature`. + * 2. Check if the new `operand_Dtypes` is cached (if it is, got to 4.) + * 3. Find the best matching "loop". This is done using multiple dispatching + * on all `operand_DTypes` and loop `dtypes`. A matching loop must be + * one whose DTypes are superclasses of the `operand_DTypes` (that are + * defined). The best matching loop must be better than any other matching + * loop. This result is cached. + * 4. If the found loop is a promoter: We call the promoter. It can modify + * the `operand_DTypes` currently. Then go back to step 2. + * (The promoter can call arbitrary code, so it could even add the matching + * loop first.) + * 5. The final `ArrayMethod` is found, its registered `dtypes` is copied + * into the `signature` so that it is available to the ufunc loop. + * + */ +#include <Python.h> + +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "numpy/ndarraytypes.h" +#include "common.h" + +#include "dispatching.h" +#include "dtypemeta.h" +#include "npy_hashtable.h" +#include "legacy_array_method.h" +#include "ufunc_object.h" +#include "ufunc_type_resolution.h" + + +/* forward declaration */ +static NPY_INLINE PyObject * +promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, + PyArrayObject *const ops[], + PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *op_dtypes[], + npy_bool allow_legacy_promotion, npy_bool cache); + + +/** + * Function to add a new loop to the ufunc. This mainly appends it to the + * list (as it currently is just a list). + * + * @param ufunc The universal function to add the loop to. + * @param info The tuple (dtype_tuple, ArrayMethod/promoter). + * @param ignore_duplicate If 1 and a loop with the same `dtype_tuple` is + * found, the function does nothing. + */ +static int +add_ufunc_loop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate) +{ + /* + * Validate the info object, this should likely move to to a different + * entry-point in the future (and is mostly unnecessary currently). + */ + if (!PyTuple_CheckExact(info) || PyTuple_GET_SIZE(info) != 2) { + PyErr_SetString(PyExc_TypeError, + "Info must be a tuple: " + "(tuple of DTypes or None, ArrayMethod or promoter)"); + return -1; + } + PyObject *DType_tuple = PyTuple_GetItem(info, 0); + if (PyTuple_GET_SIZE(DType_tuple) != ufunc->nargs) { + PyErr_SetString(PyExc_TypeError, + "DType tuple length does not match ufunc number of operands"); + return -1; + } + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(DType_tuple); i++) { + PyObject *item = PyTuple_GET_ITEM(DType_tuple, i); + if (item != Py_None + && !PyObject_TypeCheck(item, &PyArrayDTypeMeta_Type)) { + PyErr_SetString(PyExc_TypeError, + "DType tuple may only contain None and DType classes"); + return -1; + } + } + if (!PyObject_TypeCheck(PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) { + /* Must also accept promoters in the future. */ + PyErr_SetString(PyExc_TypeError, + "Second argument to info must be an ArrayMethod or promoter"); + return -1; + } + + if (ufunc->_loops == NULL) { + ufunc->_loops = PyList_New(0); + if (ufunc->_loops == NULL) { + return -1; + } + } + + PyObject *loops = ufunc->_loops; + Py_ssize_t length = PyList_Size(loops); + for (Py_ssize_t i = 0; i < length; i++) { + PyObject *item = PyList_GetItem(loops, i); + PyObject *cur_DType_tuple = PyTuple_GetItem(item, 0); + int cmp = PyObject_RichCompareBool(cur_DType_tuple, DType_tuple, Py_EQ); + if (cmp < 0) { + return -1; + } + if (cmp == 0) { + continue; + } + if (ignore_duplicate) { + return 0; + } + PyErr_Format(PyExc_TypeError, + "A loop/promoter has already been registered with '%s' for %R", + ufunc_get_name_cstr(ufunc), DType_tuple); + return -1; + } + + if (PyList_Append(loops, info) < 0) { + return -1; + } + return 0; +} + + +/** + * Resolves the implementation to use, this uses typical multiple dispatching + * methods of finding the best matching implementation or resolver. + * (Based on `isinstance()`, the knowledge that non-abstract DTypes cannot + * be subclassed is used, however.) + * + * @param ufunc + * @param op_dtypes The DTypes that are either passed in (defined by an + * operand) or defined by the `signature` as also passed in as + * `fixed_DTypes`. + * @param out_info Returns the tuple describing the best implementation + * (consisting of dtypes and ArrayMethod or promoter). + * WARNING: Returns a borrowed reference! + * @returns -1 on error 0 on success. Note that the output can be NULL on + * success if nothing is found. + */ +static int +resolve_implementation_info(PyUFuncObject *ufunc, + PyArray_DTypeMeta *op_dtypes[], PyObject **out_info) +{ + int nin = ufunc->nin, nargs = ufunc->nargs; + Py_ssize_t size = PySequence_Length(ufunc->_loops); + PyObject *best_dtypes = NULL; + PyObject *best_resolver_info = NULL; + + for (Py_ssize_t res_idx = 0; res_idx < size; res_idx++) { + /* Test all resolvers */ + PyObject *resolver_info = PySequence_Fast_GET_ITEM( + ufunc->_loops, res_idx); + PyObject *curr_dtypes = PyTuple_GET_ITEM(resolver_info, 0); + /* + * Test if the current resolver matches, it could make sense to + * reorder these checks to avoid the IsSubclass check as much as + * possible. + */ + + npy_bool matches = NPY_TRUE; + /* + * NOTE: We check also the output DType. In principle we do not + * have to strictly match it (unless it is provided by the + * `signature`). This assumes that a (fallback) promoter will + * unset the output DType if no exact match is found. + */ + for (Py_ssize_t i = 0; i < nargs; i++) { + PyArray_DTypeMeta *given_dtype = op_dtypes[i]; + PyArray_DTypeMeta *resolver_dtype = ( + (PyArray_DTypeMeta *)PyTuple_GET_ITEM(curr_dtypes, i)); + assert((PyObject *)given_dtype != Py_None); + if (given_dtype == NULL && i >= nin) { + /* Unspecified out always matches (see below for inputs) */ + continue; + } + if (given_dtype == resolver_dtype) { + continue; + } + if (!resolver_dtype->abstract) { + matches = NPY_FALSE; + break; + } + if (given_dtype == NULL) { + /* + * If an input was not specified, this is a reduce-like + * operation: reductions use `(operand_DType, NULL, out_DType)` + * as they only have a single operand. This allows special + * reduce promotion rules useful for example for sum/product. + * E.g. `np.add.reduce([True, True])` promotes to integer. + * + * Continuing here allows a promoter to handle reduce-like + * promotions explicitly if necessary. + * TODO: The `!resolver_dtype->abstract` currently ensures that + * this is a promoter. If we allow ArrayMethods to use + * abstract DTypes, we may have to reject it here or the + * ArrayMethod has to implement the reduce promotion. + */ + continue; + } + int subclass = PyObject_IsSubclass( + (PyObject *)given_dtype, (PyObject *)resolver_dtype); + if (subclass < 0) { + return -1; + } + if (!subclass) { + matches = NPY_FALSE; + break; + } + /* + * TODO: Could consider allowing reverse subclass relation, i.e. + * the operation DType passed in to be abstract. That + * definitely is OK for outputs (and potentially useful, + * you could enforce e.g. an inexact result). + * It might also be useful for some stranger promoters. + */ + } + if (!matches) { + continue; + } + + /* The resolver matches, but we have to check if it is better */ + if (best_dtypes != NULL) { + int current_best = -1; /* -1 neither, 0 current best, 1 new */ + /* + * If both have concrete and None in the same position and + * they are identical, we will continue searching using the + * first best for comparison, in an attempt to find a better + * one. + * In all cases, we give up resolution, since it would be + * necessary to compare to two "best" cases. + */ + int unambiguously_equally_good = 1; + for (Py_ssize_t i = 0; i < nargs; i++) { + int best; + + PyObject *prev_dtype = PyTuple_GET_ITEM(best_dtypes, i); + PyObject *new_dtype = PyTuple_GET_ITEM(curr_dtypes, i); + + if (prev_dtype == new_dtype) { + /* equivalent, so this entry does not matter */ + continue; + } + /* + * TODO: Even if the input is not specified, if we have + * abstract DTypes and one is a subclass of the other, + * the subclass should be considered a better match + * (subclasses are always more specific). + */ + /* If either is None, the other is strictly more specific */ + if (prev_dtype == Py_None) { + unambiguously_equally_good = 0; + best = 1; + } + else if (new_dtype == Py_None) { + unambiguously_equally_good = 0; + best = 0; + } + /* + * If both are concrete and not identical, this is + * ambiguous. + */ + else if (!((PyArray_DTypeMeta *)prev_dtype)->abstract && + !((PyArray_DTypeMeta *)new_dtype)->abstract) { + /* + * Ambiguous unless the are identical (checked above), + * but since they are concrete it does not matter which + * best to compare. + */ + best = -1; + } + /* + * TODO: Unreachable, but we will need logic for abstract + * DTypes to decide if one is a subclass of the other + * (And their subclass relation is well defined.) + */ + else { + assert(0); + } + + if ((current_best != -1) && (current_best != best)) { + /* + * We need a clear best, this could be tricky, unless + * the signature is identical, we would have to compare + * against both of the found ones until we find a + * better one. + * Instead, only support the case where they are + * identical. + */ + /* TODO: Document the above comment, may need relaxing? */ + current_best = -1; + break; + } + current_best = best; + } + + if (current_best == -1) { + /* + * TODO: It would be nice to have a "diagnostic mode" that + * informs if this happens! (An immediate error currently + * blocks later legacy resolution, but may work in the + * future.) + */ + if (unambiguously_equally_good) { + /* unset the best resolver to indicate this */ + best_resolver_info = NULL; + continue; + } + *out_info = NULL; + return 0; + } + else if (current_best == 0) { + /* The new match is not better, continue looking. */ + continue; + } + } + /* The new match is better (or there was no previous match) */ + best_dtypes = curr_dtypes; + best_resolver_info = resolver_info; + } + if (best_dtypes == NULL) { + /* The non-legacy lookup failed */ + *out_info = NULL; + return 0; + } + + *out_info = best_resolver_info; + return 0; +} + + +/* + * A promoter can currently be either a C-Capsule containing a promoter + * function pointer, or a Python function. Both of these can at this time + * only return new operation DTypes (i.e. mutate the input while leaving + * those defined by the `signature` unmodified). + */ +static PyObject * +call_promoter_and_recurse( + PyUFuncObject *NPY_UNUSED(ufunc), PyObject *NPY_UNUSED(promoter), + PyArray_DTypeMeta *NPY_UNUSED(op_dtypes[]), + PyArray_DTypeMeta *NPY_UNUSED(signature[]), + PyArrayObject *const NPY_UNUSED(operands[])) +{ + PyErr_SetString(PyExc_NotImplementedError, + "Internal NumPy error, promoters are not used/implemented yet."); + return NULL; +} + + +/* + * Convert the DType `signature` into the tuple of descriptors that is used + * by the old ufunc type resolvers in `ufunc_type_resolution.c`. + * + * Note that we do not need to pass the type tuple when we use the legacy path + * for type resolution rather than promotion, since the signature is always + * correct in that case. + */ +static int +_make_new_typetup( + int nop, PyArray_DTypeMeta *signature[], PyObject **out_typetup) { + *out_typetup = PyTuple_New(nop); + if (*out_typetup == NULL) { + return -1; + } + + int none_count = 0; + for (int i = 0; i < nop; i++) { + PyObject *item; + if (signature[i] == NULL) { + item = Py_None; + none_count++; + } + else { + if (!signature[i]->legacy || signature[i]->abstract) { + /* + * The legacy type resolution can't deal with these. + * This path will return `None` or so in the future to + * set an error later if the legacy type resolution is used. + */ + PyErr_SetString(PyExc_RuntimeError, + "Internal NumPy error: new DType in signature not yet " + "supported. (This should be unreachable code!)"); + Py_SETREF(*out_typetup, NULL); + return -1; + } + item = (PyObject *)signature[i]->singleton; + } + Py_INCREF(item); + PyTuple_SET_ITEM(*out_typetup, i, item); + } + if (none_count == nop) { + /* The whole signature was None, simply ignore type tuple */ + Py_DECREF(*out_typetup); + *out_typetup = NULL; + } + return 0; +} + + +/* + * Fills in the operation_DTypes with borrowed references. This may change + * the content, since it will use the legacy type resolution, which can special + * case 0-D arrays (using value-based logic). + */ +static int +legacy_promote_using_legacy_type_resolver(PyUFuncObject *ufunc, + PyArrayObject *const *ops, PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *operation_DTypes[], int *out_cacheable) +{ + int nargs = ufunc->nargs; + PyArray_Descr *out_descrs[NPY_MAXARGS] = {NULL}; + + PyObject *type_tuple = NULL; + if (_make_new_typetup(nargs, signature, &type_tuple) < 0) { + return -1; + } + + /* + * We use unsafe casting. This is of course not accurate, but that is OK + * here, because for promotion/dispatching the casting safety makes no + * difference. Whether the actual operands can be casts must be checked + * during the type resolution step (which may _also_ calls this!). + */ + if (ufunc->type_resolver(ufunc, + NPY_UNSAFE_CASTING, (PyArrayObject **)ops, type_tuple, + out_descrs) < 0) { + Py_XDECREF(type_tuple); + return -1; + } + Py_XDECREF(type_tuple); + + for (int i = 0; i < nargs; i++) { + Py_XSETREF(operation_DTypes[i], NPY_DTYPE(out_descrs[i])); + Py_INCREF(operation_DTypes[i]); + Py_DECREF(out_descrs[i]); + } + if (ufunc->type_resolver == &PyUFunc_SimpleBinaryComparisonTypeResolver) { + /* + * In this one case, the deprecation means that we actually override + * the signature. + */ + for (int i = 0; i < nargs; i++) { + if (signature[i] != NULL && signature[i] != operation_DTypes[i]) { + Py_INCREF(operation_DTypes[i]); + Py_SETREF(signature[i], operation_DTypes[i]); + *out_cacheable = 0; + } + } + } + return 0; +} + + +/* + * Note, this function returns a BORROWED references to info since it adds + * it to the loops. + */ +NPY_NO_EXPORT PyObject * +add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc, + PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate) +{ + PyObject *DType_tuple = PyArray_TupleFromItems(ufunc->nargs, + (PyObject **)operation_dtypes, 0); + if (DType_tuple == NULL) { + return NULL; + } + + PyArrayMethodObject *method = PyArray_NewLegacyWrappingArrayMethod( + ufunc, operation_dtypes); + if (method == NULL) { + Py_DECREF(DType_tuple); + return NULL; + } + PyObject *info = PyTuple_Pack(2, DType_tuple, method); + Py_DECREF(DType_tuple); + Py_DECREF(method); + if (info == NULL) { + return NULL; + } + if (add_ufunc_loop(ufunc, info, ignore_duplicate) < 0) { + Py_DECREF(info); + return NULL; + } + + return info; +} + + +/* + * The main implementation to find the correct DType signature and ArrayMethod + * to use for a ufunc. This function may recurse with `do_legacy_fallback` + * set to False. + * + * If value-based promotion is necessary, this is handled ahead of time by + * `promote_and_get_ufuncimpl`. + */ +static NPY_INLINE PyObject * +promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, + PyArrayObject *const ops[], + PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *op_dtypes[], + npy_bool allow_legacy_promotion, npy_bool cache) +{ + /* + * Fetch the dispatching info which consists of the implementation and + * the DType signature tuple. There are three steps: + * + * 1. Check the cache. + * 2. Check all registered loops/promoters to find the best match. + * 3. Fall back to the legacy implementation if no match was found. + */ + PyObject *info = PyArrayIdentityHash_GetItem(ufunc->_dispatch_cache, + (PyObject **)op_dtypes); + if (info != NULL && PyObject_TypeCheck( + PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) { + /* Found the ArrayMethod and NOT a promoter: return it */ + return info; + } + + /* + * If `info == NULL`, the caching failed, repeat using the full resolution + * in `resolve_implementation_info`. + */ + if (info == NULL) { + if (resolve_implementation_info(ufunc, op_dtypes, &info) < 0) { + return NULL; + } + if (info != NULL && PyObject_TypeCheck( + PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) { + /* + * Found the ArrayMethod and NOT promoter. Before returning it + * add it to the cache for faster lookup in the future. + */ + if (cache && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache, + (PyObject **)op_dtypes, info, 0) < 0) { + return NULL; + } + return info; + } + } + + /* + * At this point `info` is NULL if there is no matching loop, or it is + * a promoter that needs to be used/called: + */ + if (info != NULL) { + PyObject *promoter = PyTuple_GET_ITEM(info, 1); + + info = call_promoter_and_recurse(ufunc, + promoter, op_dtypes, signature, ops); + if (info == NULL && PyErr_Occurred()) { + return NULL; + } + else if (info != NULL) { + return info; + } + } + + /* + * Even using promotion no loop was found. + * Using promotion failed, this should normally be an error. + * However, we need to give the legacy implementation a chance here. + * (it will modify `op_dtypes`). + */ + if (!allow_legacy_promotion || ufunc->type_resolver == NULL || + (ufunc->ntypes == 0 && ufunc->userloops == NULL)) { + /* Already tried or not a "legacy" ufunc (no loop found, return) */ + return NULL; + } + + PyArray_DTypeMeta *new_op_dtypes[NPY_MAXARGS] = {NULL}; + int cacheable = 1; /* TODO: only the comparison deprecation needs this */ + if (legacy_promote_using_legacy_type_resolver(ufunc, + ops, signature, new_op_dtypes, &cacheable) < 0) { + return NULL; + } + info = promote_and_get_info_and_ufuncimpl(ufunc, + ops, signature, new_op_dtypes, NPY_FALSE, cacheable); + for (int i = 0; i < ufunc->nargs; i++) { + Py_XDECREF(new_op_dtypes); + } + return info; +} + + +/** + * The central entry-point for the promotion and dispatching machinery. + * + * It currently may work with the operands (although it would be possible to + * only work with DType (classes/types). This is because it has to ensure + * that legacy (value-based promotion) is used when necessary. + * + * @param ufunc The ufunc object, used mainly for the fallback. + * @param ops The array operands (used only for the fallback). + * @param signature As input, the DType signature fixed explicitly by the user. + * The signature is *filled* in with the operation signature we end up + * using. + * @param op_dtypes The operand DTypes (without casting) which are specified + * either by the `signature` or by an `operand`. + * (outputs and the second input can be NULL for reductions). + * NOTE: In some cases, the promotion machinery may currently modify + * these. + * @param force_legacy_promotion If set, we have to use the old type resolution + * to implement value-based promotion/casting. + */ +NPY_NO_EXPORT PyArrayMethodObject * +promote_and_get_ufuncimpl(PyUFuncObject *ufunc, + PyArrayObject *const ops[], + PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *op_dtypes[], + npy_bool force_legacy_promotion, + npy_bool allow_legacy_promotion) +{ + int nargs = ufunc->nargs; + + /* + * Get the actual DTypes we operate with by mixing the operand array + * ones with the passed signature. + */ + for (int i = 0; i < nargs; i++) { + if (signature[i] != NULL) { + /* + * ignore the operand input, we cannot overwrite signature yet + * since it is fixed (cannot be promoted!) + */ + Py_INCREF(signature[i]); + Py_XSETREF(op_dtypes[i], signature[i]); + assert(i >= ufunc->nin || !signature[i]->abstract); + } + } + + if (force_legacy_promotion) { + /* + * We must use legacy promotion for value-based logic. Call the old + * resolver once up-front to get the "actual" loop dtypes. + * After this (additional) promotion, we can even use normal caching. + */ + int cacheable = 1; /* unused, as we modify the original `op_dtypes` */ + if (legacy_promote_using_legacy_type_resolver(ufunc, + ops, signature, op_dtypes, &cacheable) < 0) { + return NULL; + } + } + + PyObject *info = promote_and_get_info_and_ufuncimpl(ufunc, + ops, signature, op_dtypes, allow_legacy_promotion, NPY_TRUE); + + if (info == NULL) { + if (!PyErr_Occurred()) { + raise_no_loop_found_error(ufunc, (PyObject **)op_dtypes); + } + return NULL; + } + + PyArrayMethodObject *method = (PyArrayMethodObject *)PyTuple_GET_ITEM(info, 1); + + /* Fill `signature` with final DTypes used by the ArrayMethod/inner-loop */ + PyObject *all_dtypes = PyTuple_GET_ITEM(info, 0); + for (int i = 0; i < nargs; i++) { + if (signature[i] == NULL) { + signature[i] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(all_dtypes, i); + Py_INCREF(signature[i]); + } + else { + assert((PyObject *)signature[i] == PyTuple_GET_ITEM(all_dtypes, i)); + } + } + + return method; +} diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h new file mode 100644 index 000000000..cefad691f --- /dev/null +++ b/numpy/core/src/umath/dispatching.h @@ -0,0 +1,22 @@ +#ifndef _NPY_DISPATCHING_H +#define _NPY_DISPATCHING_H + +#define _UMATHMODULE + +#include <numpy/ufuncobject.h> +#include "array_method.h" + + +NPY_NO_EXPORT PyArrayMethodObject * +promote_and_get_ufuncimpl(PyUFuncObject *ufunc, + PyArrayObject *const ops[], + PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *op_dtypes[], + npy_bool force_legacy_promotion, + npy_bool allow_legacy_promotion); + +NPY_NO_EXPORT PyObject * +add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc, + PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate); + +#endif /*_NPY_DISPATCHING_H */ diff --git a/numpy/core/src/umath/legacy_array_method.c b/numpy/core/src/umath/legacy_array_method.c new file mode 100644 index 000000000..e5043aa71 --- /dev/null +++ b/numpy/core/src/umath/legacy_array_method.c @@ -0,0 +1,257 @@ +/* + * This file defines most of the machinery in order to wrap legacy style + * ufunc loops into new style arraymethods. + */ + +#include <Python.h> + +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include "numpy/ndarraytypes.h" + +#include "convert_datatype.h" +#include "array_method.h" +#include "dtype_transfer.h" +#include "legacy_array_method.h" + + +typedef struct { + NpyAuxData base; + /* The legacy loop and additional user data: */ + PyUFuncGenericFunction loop; + void *user_data; + /* Whether to check for PyErr_Occurred(), must require GIL if used */ + int pyerr_check; +} legacy_array_method_auxdata; + + +/* Use a free list, since we should normally only need one at a time */ +#define NPY_LOOP_DATA_CACHE_SIZE 5 +static int loop_data_num_cached = 0; +static legacy_array_method_auxdata *loop_data_cache[NPY_LOOP_DATA_CACHE_SIZE]; + + +static void +legacy_array_method_auxdata_free(NpyAuxData *data) +{ + if (loop_data_num_cached < NPY_LOOP_DATA_CACHE_SIZE) { + loop_data_cache[loop_data_num_cached] = ( + (legacy_array_method_auxdata *)data); + loop_data_num_cached++; + } + else { + PyMem_Free(data); + } +} + +#undef NPY_LOOP_DATA_CACHE_SIZE + + +NpyAuxData * +get_new_loop_data( + PyUFuncGenericFunction loop, void *user_data, int pyerr_check) +{ + legacy_array_method_auxdata *data; + if (NPY_LIKELY(loop_data_num_cached > 0)) { + loop_data_num_cached--; + data = loop_data_cache[loop_data_num_cached]; + } + else { + data = PyMem_Malloc(sizeof(legacy_array_method_auxdata)); + if (data == NULL) { + return NULL; + } + data->base.free = legacy_array_method_auxdata_free; + data->base.clone = NULL; /* no need for cloning (at least for now) */ + } + data->loop = loop; + data->user_data = user_data; + data->pyerr_check = pyerr_check; + return (NpyAuxData *)data; +} + + +/* + * This is a thin wrapper around the legacy loop signature. + */ +static int +generic_wrapped_legacy_loop(PyArrayMethod_Context *NPY_UNUSED(context), + char *const *data, const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) +{ + legacy_array_method_auxdata *ldata = (legacy_array_method_auxdata *)auxdata; + + ldata->loop((char **)data, dimensions, strides, ldata->user_data); + if (ldata->pyerr_check && PyErr_Occurred()) { + return -1; + } + return 0; +} + + +/* + * Signal that the old type-resolution function must be used to resolve + * the descriptors (mainly/only used for datetimes due to the unit). + * + * ArrayMethod's are expected to implement this, but it is too tricky + * to support properly. So we simply set an error that should never be seen. + */ +NPY_NO_EXPORT NPY_CASTING +wrapped_legacy_resolve_descriptors(PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[]), + PyArray_Descr *NPY_UNUSED(given_descrs[]), + PyArray_Descr *NPY_UNUSED(loop_descrs[])) +{ + PyErr_SetString(PyExc_RuntimeError, + "cannot use legacy wrapping ArrayMethod without calling the ufunc " + "itself. If this error is hit, the solution will be to port the " + "legacy ufunc loop implementation to the new API."); + return -1; +} + +/* + * Much the same as the default type resolver, but tries a bit harder to + * preserve metadata. + */ +static NPY_CASTING +simple_legacy_resolve_descriptors( + PyArrayMethodObject *method, + PyArray_DTypeMeta **dtypes, + PyArray_Descr **given_descrs, + PyArray_Descr **output_descrs) +{ + int nin = method->nin; + int nout = method->nout; + + for (int i = 0; i < nin + nout; i++) { + if (given_descrs[i] != NULL) { + output_descrs[i] = ensure_dtype_nbo(given_descrs[i]); + } + else if (dtypes[i] == dtypes[0] && i > 0) { + /* Preserve metadata from the first operand if same dtype */ + Py_INCREF(output_descrs[0]); + output_descrs[i] = output_descrs[0]; + } + else { + output_descrs[i] = dtypes[i]->default_descr(dtypes[i]); + } + if (output_descrs[i] == NULL) { + goto fail; + } + } + + return NPY_SAFE_CASTING; + + fail: + for (int i = 0; i < nin + nout; i++) { + Py_CLEAR(output_descrs[i]); + } + return -1; +} + + +/* + * This function grabs the legacy inner-loop. If this turns out to be slow + * we could probably cache it (with some care). + */ +NPY_NO_EXPORT int +get_wrapped_legacy_ufunc_loop(PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *NPY_UNUSED(strides), + PyArrayMethod_StridedLoop **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + assert(aligned); + assert(!move_references); + + if (context->caller == NULL || + !PyObject_TypeCheck(context->caller, &PyUFunc_Type)) { + PyErr_Format(PyExc_RuntimeError, + "cannot call %s without its ufunc as caller context.", + context->method->name); + return -1; + } + + PyUFuncObject *ufunc = (PyUFuncObject *)context->caller; + void *user_data; + int needs_api = 0; + + PyUFuncGenericFunction loop = NULL; + /* Note that `needs_api` is not reliable (it was in fact unused normally) */ + if (ufunc->legacy_inner_loop_selector(ufunc, + context->descriptors, &loop, &user_data, &needs_api) < 0) { + return -1; + } + *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS; + if (needs_api) { + *flags |= NPY_METH_REQUIRES_PYAPI; + } + + *out_loop = &generic_wrapped_legacy_loop; + *out_transferdata = get_new_loop_data( + loop, user_data, (*flags & NPY_METH_REQUIRES_PYAPI) != 0); + return 0; +} + + +/* + * Get the unbound ArrayMethod which wraps the instances of the ufunc. + * Note that this function stores the result on the ufunc and then only + * returns the same one. + */ +NPY_NO_EXPORT PyArrayMethodObject * +PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, + PyArray_DTypeMeta *signature[]) +{ + char method_name[101]; + const char *name = ufunc->name ? ufunc->name : "<unknown>"; + snprintf(method_name, 100, "legacy_ufunc_wrapper_for_%s", name); + + /* + * Assume that we require the Python API when any of the (legacy) dtypes + * flags it. + */ + int any_output_flexible = 0; + NPY_ARRAYMETHOD_FLAGS flags = 0; + + for (int i = 0; i < ufunc->nin+ufunc->nout; i++) { + if (signature[i]->singleton->flags & ( + NPY_ITEM_REFCOUNT | NPY_ITEM_IS_POINTER | NPY_NEEDS_PYAPI)) { + flags |= NPY_METH_REQUIRES_PYAPI; + } + if (signature[i]->parametric) { + any_output_flexible = 1; + } + } + + PyType_Slot slots[3] = { + {NPY_METH_get_loop, &get_wrapped_legacy_ufunc_loop}, + {NPY_METH_resolve_descriptors, &simple_legacy_resolve_descriptors}, + {0, NULL}, + }; + if (any_output_flexible) { + /* We cannot use the default descriptor resolver. */ + slots[1].pfunc = &wrapped_legacy_resolve_descriptors; + } + + PyArrayMethod_Spec spec = { + .name = method_name, + .nin = ufunc->nin, + .nout = ufunc->nout, + .dtypes = signature, + .flags = flags, + .slots = slots, + .casting = NPY_EQUIV_CASTING, + }; + + PyBoundArrayMethodObject *bound_res = PyArrayMethod_FromSpec_int(&spec, 1); + if (bound_res == NULL) { + return NULL; + } + PyArrayMethodObject *res = bound_res->method; + Py_INCREF(res); + Py_DECREF(bound_res); + return res; +} diff --git a/numpy/core/src/umath/legacy_array_method.h b/numpy/core/src/umath/legacy_array_method.h new file mode 100644 index 000000000..0dec1fb3a --- /dev/null +++ b/numpy/core/src/umath/legacy_array_method.h @@ -0,0 +1,33 @@ +#ifndef _NPY_LEGACY_ARRAY_METHOD_H +#define _NPY_LEGACY_ARRAY_METHOD_H + +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" +#include "array_method.h" + + +NPY_NO_EXPORT PyArrayMethodObject * +PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, + PyArray_DTypeMeta *signature[]); + + + +/* + * The following two symbols are in the header so that other places can use + * them to probe for special cases (or whether an ArrayMethod is a "legacy" + * one). + */ +NPY_NO_EXPORT int +get_wrapped_legacy_ufunc_loop(PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *NPY_UNUSED(strides), + PyArrayMethod_StridedLoop **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + +NPY_NO_EXPORT NPY_CASTING +wrapped_legacy_resolve_descriptors(PyArrayMethodObject *, + PyArray_DTypeMeta **, PyArray_Descr **, PyArray_Descr **); + + +#endif /*_NPY_LEGACY_ARRAY_METHOD_H */ diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 60a315f6e..5a32ae603 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -41,6 +41,7 @@ #include "ufunc_type_resolution.h" #include "reduction.h" #include "mem_overlap.h" +#include "npy_hashtable.h" #include "ufunc_object.h" #include "override.h" @@ -49,7 +50,10 @@ #include "common.h" #include "dtypemeta.h" #include "numpyos.h" +#include "dispatching.h" #include "convert_datatype.h" +#include "legacy_array_method.h" +#include "abstractdtypes.h" /********** PRINTF DEBUG TRACING **************/ #define NPY_UF_DBG_TRACING 0 @@ -101,6 +105,12 @@ _get_wrap_prepare_args(ufunc_full_args full_args) { static PyObject * prepare_input_arguments_for_outer(PyObject *args, PyUFuncObject *ufunc); +static int +resolve_descriptors(int nop, + PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl, + PyArrayObject *operands[], PyArray_Descr *dtypes[], + PyArray_DTypeMeta *signature[], NPY_CASTING casting); + /*UFUNC_API*/ NPY_NO_EXPORT int @@ -911,7 +921,9 @@ _wheremask_converter(PyObject *obj, PyArrayObject **wheremask) */ static int convert_ufunc_arguments(PyUFuncObject *ufunc, - ufunc_full_args full_args, PyArrayObject **out_op, + ufunc_full_args full_args, PyArrayObject *out_op[], + PyArray_DTypeMeta *out_op_DTypes[], + npy_bool *force_legacy_promotion, npy_bool *allow_legacy_promotion, PyObject *order_obj, NPY_ORDER *out_order, PyObject *casting_obj, NPY_CASTING *out_casting, PyObject *subok_obj, npy_bool *out_subok, @@ -924,21 +936,55 @@ convert_ufunc_arguments(PyUFuncObject *ufunc, PyObject *obj; /* Convert and fill in input arguments */ + npy_bool all_scalar = NPY_TRUE; + npy_bool any_scalar = NPY_FALSE; + *allow_legacy_promotion = NPY_TRUE; + *force_legacy_promotion = NPY_FALSE; for (int i = 0; i < nin; i++) { obj = PyTuple_GET_ITEM(full_args.in, i); if (PyArray_Check(obj)) { - PyArrayObject *obj_a = (PyArrayObject *)obj; - out_op[i] = (PyArrayObject *)PyArray_FromArray(obj_a, NULL, 0); + out_op[i] = (PyArrayObject *)obj; + Py_INCREF(out_op[i]); } else { - out_op[i] = (PyArrayObject *)PyArray_FromAny(obj, - NULL, 0, 0, 0, NULL); + /* Convert the input to an array and check for special cases */ + out_op[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, NULL); + if (out_op[i] == NULL) { + goto fail; + } } + out_op_DTypes[i] = NPY_DTYPE(PyArray_DESCR(out_op[i])); + Py_INCREF(out_op_DTypes[i]); - if (out_op[i] == NULL) { - goto fail; + if (!out_op_DTypes[i]->legacy) { + *allow_legacy_promotion = NPY_FALSE; + } + if (PyArray_NDIM(out_op[i]) == 0) { + any_scalar = NPY_TRUE; + } + else { + all_scalar = NPY_FALSE; + continue; } + /* + * TODO: we need to special case scalars here, if the input is a + * Python int, float, or complex, we have to use the "weak" + * DTypes: `PyArray_PyIntAbstractDType`, etc. + * This is to allow e.g. `float32(1.) + 1` to return `float32`. + * The correct array dtype can only be found after promotion for + * such a "weak scalar". We could avoid conversion here, but + * must convert it for use in the legacy promotion. + * There is still a small chance that this logic can instead + * happen inside the Python operators. + */ + } + if (*allow_legacy_promotion && (!all_scalar && any_scalar)) { + *force_legacy_promotion = should_use_min_scalar(nin, out_op, 0, NULL); + /* + * TODO: if this is False, we end up in a "very slow" path that should + * be avoided. This makes `int_arr + 0.` ~40% slower. + */ } /* Convert and fill in output arguments */ @@ -948,6 +994,10 @@ convert_ufunc_arguments(PyUFuncObject *ufunc, if (_set_out_array(obj, out_op + i + nin) < 0) { goto fail; } + if (out_op[i] != NULL) { + out_op_DTypes[i + nin] = NPY_DTYPE(PyArray_DESCR(out_op[i])); + Py_INCREF(out_op_DTypes[i + nin]); + } } } @@ -991,11 +1041,11 @@ fail: * -1 if there is an error. */ static int -check_for_trivial_loop(PyUFuncObject *ufunc, +check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, PyArrayObject **op, PyArray_Descr **dtypes, - npy_intp buffersize) + NPY_CASTING casting, npy_intp buffersize) { - int i, nin = ufunc->nin, nop = nin + ufunc->nout; + int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout; for (i = 0; i < nop; ++i) { /* @@ -1017,6 +1067,10 @@ check_for_trivial_loop(PyUFuncObject *ufunc, if (!(safety & _NPY_CAST_IS_VIEW)) { must_copy = 1; } + + if (PyArray_MinCastSafety(safety, casting) != casting) { + return 0; /* the cast is not safe enough */ + } } if (must_copy) { /* @@ -1132,14 +1186,15 @@ prepare_ufunc_output(PyUFuncObject *ufunc, * * Returns -2 if a trivial loop is not possible, 0 on success and -1 on error. */ -static NPY_INLINE int -try_trivial_single_output_loop(PyUFuncObject *ufunc, - PyArrayObject *op[], PyArray_Descr *dtypes[], - NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args) +static int +try_trivial_single_output_loop(PyArrayMethod_Context *context, + PyArrayObject *op[], NPY_ORDER order, + PyObject *arr_prep[], ufunc_full_args full_args, + int errormask, PyObject *extobj) { - int nin = ufunc->nin; + int nin = context->method->nin; int nop = nin + 1; - assert(ufunc->nout == 1); + assert(context->method->nout == 1); /* The order of all N-D contiguous operands, can be fixed by `order` */ int operation_order = 0; @@ -1204,14 +1259,14 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, } if (op[nin] == NULL) { - Py_INCREF(dtypes[nin]); + Py_INCREF(context->descriptors[nin]); op[nin] = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type, - dtypes[nin], operation_ndim, operation_shape, + context->descriptors[nin], operation_ndim, operation_shape, NULL, NULL, operation_order==NPY_ARRAY_F_CONTIGUOUS, NULL); if (op[nin] == NULL) { return -1; } - fixed_strides[nin] = dtypes[nin]->elsize; + fixed_strides[nin] = context->descriptors[nin]->elsize; } else { /* If any input overlaps with the output, we use the full path. */ @@ -1232,7 +1287,7 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, } /* Call the __prepare_array__ if necessary */ - if (prepare_ufunc_output(ufunc, &op[nin], + if (prepare_ufunc_output((PyUFuncObject *)context->caller, &op[nin], arr_prep[0], full_args, 0) < 0) { return -1; } @@ -1243,46 +1298,88 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc, */ char *data[NPY_MAXARGS]; npy_intp count = PyArray_MultiplyList(operation_shape, operation_ndim); - int needs_api = 0; NPY_BEGIN_THREADS_DEF; - PyUFuncGenericFunction innerloop; - void *innerloopdata = NULL; - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata = NULL; + NPY_ARRAYMETHOD_FLAGS flags = 0; + if (context->method->get_strided_loop(context, + 1, 0, fixed_strides, + &strided_loop, &auxdata, &flags) < 0) { return -1; } - - for (int iop = 0; iop < nop; iop++) { + for (int iop=0; iop < nop; iop++) { data[iop] = PyArray_BYTES(op[iop]); - needs_api |= PyDataType_REFCHK(dtypes[iop]); } - if (!needs_api) { + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + npy_clear_floatstatus_barrier((char *)context); + } + if (!(flags & NPY_METH_REQUIRES_PYAPI)) { NPY_BEGIN_THREADS_THRESHOLDED(count); } - innerloop(data, &count, fixed_strides, innerloopdata); + int res = strided_loop(context, data, &count, fixed_strides, auxdata); NPY_END_THREADS; + NPY_AUXDATA_FREE(auxdata); + + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + const char *name = ufunc_get_name_cstr((PyUFuncObject *)context->caller); + res = _check_ufunc_fperr(errormask, extobj, name); + } + return res; +} + + +/* + * Check casting: It would be nice to just move this into the iterator + * or pass in the full cast information. But this can special case + * the logical functions and prints a better error message. + */ +static NPY_INLINE int +validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc, + PyArrayObject *ops[], PyArray_Descr *descriptors[], + NPY_CASTING casting) +{ + if (method->resolve_descriptors == &wrapped_legacy_resolve_descriptors) { + /* + * In this case the legacy type resolution was definitely called + * and we do not need to check (astropy/pyerfa relied on this). + */ + return 0; + } + if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) { + return -1; + } return 0; } +/* + * The ufunc loop implementation for both normal ufunc calls and masked calls + * when the iterator has to be used. + * + * See `PyUFunc_GenericFunctionInternal` for more information (where this is + * called from). + */ static int -execute_ufunc_loop(PyUFuncObject *ufunc, - int masked, - PyArrayObject **op, - PyArray_Descr **dtypes, - NPY_ORDER order, - npy_intp buffersize, - PyObject **arr_prep, - ufunc_full_args full_args, - npy_uint32 *op_flags) +execute_ufunc_loop(PyArrayMethod_Context *context, int masked, + PyArrayObject **op, NPY_ORDER order, npy_intp buffersize, + NPY_CASTING casting, + PyObject **arr_prep, ufunc_full_args full_args, + npy_uint32 *op_flags, int errormask, PyObject *extobj) { - int nin = ufunc->nin, nout = ufunc->nout; + PyUFuncObject *ufunc = (PyUFuncObject *)context->caller; + int nin = context->method->nin, nout = context->method->nout; int nop = nin + nout; + if (validate_casting(context->method, + ufunc, op, context->descriptors, casting) < 0) { + return -1; + } + if (masked) { assert(PyArray_TYPE(op[nop]) == NPY_BOOL); if (ufunc->_always_null_previously_masked_innerloop_selector != NULL) { @@ -1345,7 +1442,7 @@ execute_ufunc_loop(PyUFuncObject *ufunc, NpyIter *iter = NpyIter_AdvancedNew(nop + masked, op, iter_flags, order, NPY_UNSAFE_CASTING, - op_flags, dtypes, + op_flags, context->descriptors, -1, NULL, NULL, buffersize); if (iter == NULL) { return -1; @@ -1410,22 +1507,25 @@ execute_ufunc_loop(PyUFuncObject *ufunc, } /* - * Get the inner loop. + * Get the inner loop, with the possibility of specialization + * based on the fixed strides. */ - int needs_api = 0; - PyUFuncGenericFunction innerloop; - void *innerloopdata = NULL; + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata; + npy_intp fixed_strides[NPY_MAXARGS]; + + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + NPY_ARRAYMETHOD_FLAGS flags = 0; if (masked) { - if (PyUFunc_DefaultMaskedInnerLoopSelector(ufunc, - dtypes, &innerloop, (NpyAuxData **)&innerloopdata, - &needs_api) < 0) { + if (PyArrayMethod_GetMaskedStridedLoop(context, + 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { NpyIter_Deallocate(iter); return -1; } } else { - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { + if (context->method->get_strided_loop(context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { NpyIter_Deallocate(iter); return -1; } @@ -1434,87 +1534,45 @@ execute_ufunc_loop(PyUFuncObject *ufunc, /* Get the variables needed for the loop */ NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { + NPY_AUXDATA_FREE(auxdata); NpyIter_Deallocate(iter); - if (masked) { - NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata); - } return -1; } char **dataptr = NpyIter_GetDataPtrArray(iter); npy_intp *strides = NpyIter_GetInnerStrideArray(iter); npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter); - needs_api |= NpyIter_IterationNeedsAPI(iter); + int needs_api = NpyIter_IterationNeedsAPI(iter); NPY_BEGIN_THREADS_DEF; - if (!needs_api) { + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + npy_clear_floatstatus_barrier((char *)context); + } + if (!needs_api && !(flags & NPY_METH_REQUIRES_PYAPI)) { NPY_BEGIN_THREADS_THRESHOLDED(full_size); } NPY_UF_DBG_PRINT("Actual inner loop:\n"); /* Execute the loop */ + int res; do { - NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr); - innerloop(dataptr, countptr, strides, innerloopdata); - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr); + res = strided_loop(context, dataptr, countptr, strides, auxdata); + } while (res == 0 && iternext(iter)); NPY_END_THREADS; - if (masked) { - NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata); - } + NPY_AUXDATA_FREE(auxdata); - /* - * Currently `innerloop` may leave an error set, in this case - * NpyIter_Deallocate will always return an error as well. - */ - if (NpyIter_Deallocate(iter) == NPY_FAIL) { - return -1; + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + const char *name = ufunc_get_name_cstr((PyUFuncObject *)context->caller); + res = _check_ufunc_fperr(errormask, extobj, name); } - return 0; -} -/* - * ufunc - the ufunc to call - * trivial_loop_ok - 1 if no alignment, data conversion, etc required - * op - the operands (ufunc->nin + ufunc->nout of them) - * dtypes - the dtype of each operand - * order - the loop execution order/output memory order - * buffersize - how big of a buffer to use - * arr_prep - the __array_prepare__ functions for the outputs - * full_args - the original input, output PyObject * - * op_flags - per-operand flags, a combination of NPY_ITER_* constants - */ -static int -execute_legacy_ufunc_loop(PyUFuncObject *ufunc, - int trivial_loop_ok, - PyArrayObject **op, - PyArray_Descr **dtypes, - NPY_ORDER order, - npy_intp buffersize, - PyObject **arr_prep, - ufunc_full_args full_args, - npy_uint32 *op_flags) -{ - /* First check for the trivial cases that don't need an iterator */ - if (trivial_loop_ok && ufunc->nout == 1) { - int fast_path_result = try_trivial_single_output_loop(ufunc, - op, dtypes, order, arr_prep, full_args); - if (fast_path_result != -2) { - return fast_path_result; - } - } - - /* - * If no trivial loop matched, an iterator is required to - * resolve broadcasting, etc - */ - NPY_UF_DBG_PRINT("iterator loop\n"); - if (execute_ufunc_loop(ufunc, 0, op, dtypes, order, - buffersize, arr_prep, full_args, op_flags) < 0) { + if (!NpyIter_Deallocate(iter)) { return -1; } - - return 0; + return res; } @@ -2007,9 +2065,9 @@ _initialize_variable_parts(PyUFuncObject *ufunc, static int PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, - PyArray_Descr *operation_descrs[], + PyArrayMethodObject *ufuncimpl, PyArray_Descr *operation_descrs[], PyArrayObject *op[], PyObject *extobj, - NPY_ORDER order, + NPY_CASTING casting, NPY_ORDER order, PyObject *axis, PyObject *axes, int keepdims) { int nin, nout; @@ -2034,13 +2092,12 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; - /* The selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; /* The dimensions which get passed to the inner loop */ npy_intp inner_dimensions[NPY_MAXDIMS+1]; /* The strides which get passed to the inner loop */ npy_intp *inner_strides = NULL; + /* Auxiliary data allocated by the ufuncimpl (ArrayMethod) */ + NpyAuxData *auxdata = NULL; /* The sizes of the core dimensions (# entries is ufunc->core_num_dim_ix) */ npy_intp *core_dim_sizes = inner_dimensions + 1; @@ -2057,6 +2114,11 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name); + if (validate_casting(ufuncimpl, + ufunc, op, operation_descrs, casting) < 0) { + return -1; + } + /* Initialize possibly variable parts to the values from the ufunc */ retval = _initialize_variable_parts(ufunc, op_core_num_dims, core_dim_sizes, core_dim_flags); @@ -2274,18 +2336,11 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, NPY_ITER_WRITEONLY | NPY_UFUNC_DEFAULT_OUTPUT_FLAGS, op_flags); - /* For the generalized ufunc, we get the loop right away too */ - retval = ufunc->legacy_inner_loop_selector(ufunc, - operation_descrs, &innerloop, &innerloopdata, &needs_api); - if (retval < 0) { - goto fail; - } /* * Set up the iterator per-op flags. For generalized ufuncs, we * can't do buffering, so must COPY or UPDATEIFCOPY. */ - iter_flags = ufunc->iter_flags | NPY_ITER_MULTI_INDEX | NPY_ITER_REFS_OK | @@ -2394,21 +2449,34 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, /* * The first nop strides are for the inner loop (but only can - * copy them after removing the core axes) + * copy them after removing the core axes). The strides will not change + * if the iterator is not buffered (they are effectively fixed). + * Supporting buffering would make sense, but probably would have to be + * done in the inner-loop itself (not the iterator). */ + assert(!NpyIter_IsBuffered(iter)); memcpy(inner_strides, NpyIter_GetInnerStrideArray(iter), NPY_SIZEOF_INTP * nop); -#if 0 - printf("strides: "); - for (i = 0; i < nop+core_dim_ixs_size; ++i) { - printf("%d ", (int)inner_strides[i]); + /* Final preparation of the arraymethod call */ + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, + }; + PyArrayMethod_StridedLoop *strided_loop; + NPY_ARRAYMETHOD_FLAGS flags = 0; + + if (ufuncimpl->get_strided_loop(&context, 1, 0, inner_strides, + &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + needs_api |= NpyIter_IterationNeedsAPI(iter); + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); } - printf("\n"); -#endif - - /* Start with the floating-point exception flags cleared */ - npy_clear_floatstatus_barrier((char*)&iter); NPY_UF_DBG_PRINT("Executing inner loop\n"); @@ -2427,29 +2495,28 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, } dataptr = NpyIter_GetDataPtrArray(iter); count_ptr = NpyIter_GetInnerLoopSizePtr(iter); - needs_api = NpyIter_IterationNeedsAPI(iter); - if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) { + if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(total_problem_size); } do { inner_dimensions[0] = *count_ptr; - innerloop(dataptr, inner_dimensions, inner_strides, innerloopdata); - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + retval = strided_loop(&context, + dataptr, inner_dimensions, inner_strides, auxdata); + } while (retval == 0 && iternext(iter)); if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) { NPY_END_THREADS; } } - /* Check whether any errors occurred during the loop */ - if (PyErr_Occurred() || - _check_ufunc_fperr(errormask, extobj, ufunc_name) < 0) { - retval = -1; - goto fail; + if (retval == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + retval = _check_ufunc_fperr(errormask, extobj, ufunc_name); } PyArray_free(inner_strides); + NPY_AUXDATA_FREE(auxdata); if (NpyIter_Deallocate(iter) < 0) { retval = -1; } @@ -2464,6 +2531,7 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, fail: NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval); PyArray_free(inner_strides); + NPY_AUXDATA_FREE(auxdata); NpyIter_Deallocate(iter); PyArray_free(remap_axis_memory); PyArray_free(remap_axis); @@ -2473,17 +2541,18 @@ fail: static int PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, - PyArray_Descr *operation_descrs[], - PyArrayObject *op[], PyObject *extobj, NPY_ORDER order, + PyArrayMethodObject *ufuncimpl, PyArray_Descr *operation_descrs[], + PyArrayObject *op[], PyObject *extobj, + NPY_CASTING casting, NPY_ORDER order, PyObject *output_array_prepare[], ufunc_full_args full_args, PyArrayObject *wheremask) { int nin = ufunc->nin, nout = ufunc->nout, nop = nin + nout; - const char *ufunc_name = ufunc_name = ufunc_get_name_cstr(ufunc);; - int retval = -1; - npy_uint32 op_flags[NPY_MAXARGS]; + const char *ufunc_name = ufunc_get_name_cstr(ufunc); + npy_intp default_op_out_flags; + npy_uint32 op_flags[NPY_MAXARGS]; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; @@ -2495,8 +2564,6 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, return -1; } - NPY_UF_DBG_PRINT("Finding inner loop\n"); - if (wheremask != NULL) { /* Set up the flags. */ default_op_out_flags = NPY_ITER_NO_SUBTYPE | @@ -2513,6 +2580,13 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, default_op_out_flags, op_flags); } + /* Final preparation of the arraymethod call */ + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, + }; + /* Do the ufunc loop */ if (wheremask != NULL) { NPY_UF_DBG_PRINT("Executing masked inner loop\n"); @@ -2525,52 +2599,38 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, op[nop] = wheremask; operation_descrs[nop] = NULL; - /* Set up the flags */ - - npy_clear_floatstatus_barrier((char*)&ufunc); - retval = execute_ufunc_loop(ufunc, 1, - op, operation_descrs, order, - buffersize, output_array_prepare, - full_args, op_flags); + return execute_ufunc_loop(&context, 1, + op, order, buffersize, casting, + output_array_prepare, full_args, op_flags, + errormask, extobj); } else { - NPY_UF_DBG_PRINT("Executing legacy inner loop\n"); + NPY_UF_DBG_PRINT("Executing normal inner loop\n"); /* * This checks whether a trivial loop is ok, making copies of - * scalar and one dimensional operands if that will help. - * Since it requires dtypes, it can only be called after - * ufunc->type_resolver + * scalar and one dimensional operands if that should help. */ - int trivial_ok = check_for_trivial_loop(ufunc, - op, operation_descrs, buffersize); + int trivial_ok = check_for_trivial_loop(ufuncimpl, + op, operation_descrs, casting, buffersize); if (trivial_ok < 0) { return -1; } + if (trivial_ok && context.method->nout == 1) { + /* Try to handle everything without using the (heavy) iterator */ + int retval = try_trivial_single_output_loop(&context, + op, order, output_array_prepare, full_args, + errormask, extobj); + if (retval != -2) { + return retval; + } + } - /* check_for_trivial_loop on half-floats can overflow */ - npy_clear_floatstatus_barrier((char*)&ufunc); - - retval = execute_legacy_ufunc_loop(ufunc, trivial_ok, - op, operation_descrs, order, - buffersize, output_array_prepare, - full_args, op_flags); - } - if (retval < 0) { - return -1; - } - - /* - * Check whether any errors occurred during the loop. The loops should - * indicate this in retval, but since the inner-loop currently does not - * report errors, this does not happen in all branches (at this time). - */ - if (PyErr_Occurred() || - _check_ufunc_fperr(errormask, extobj, ufunc_name) < 0) { - return -1; + return execute_ufunc_loop(&context, 0, + op, order, buffersize, casting, + output_array_prepare, full_args, op_flags, + errormask, extobj); } - - return retval; } @@ -4248,83 +4308,30 @@ _get_dtype(PyObject *dtype_obj) { } -static int -_make_new_typetup( - int nop, PyArray_DTypeMeta *signature[], PyObject **out_typetup) { - *out_typetup = PyTuple_New(nop); - if (*out_typetup == NULL) { - return -1; - } - - int noncount = 0; - for (int i = 0; i < nop; i++) { - PyObject *item; - if (signature[i] == NULL) { - item = Py_None; - noncount++; - } - else { - if (!signature[i]->legacy || signature[i]->abstract) { - /* - * The legacy type resolution can't deal with these. - * This path will return `None` or so in the future to - * set an error later if the legacy type resolution is used. - */ - PyErr_SetString(PyExc_RuntimeError, - "Internal NumPy error: new DType in signature not yet " - "supported. (This should be unreachable code!)"); - Py_SETREF(*out_typetup, NULL); - return -1; - } - item = (PyObject *)signature[i]->singleton; - } - Py_INCREF(item); - PyTuple_SET_ITEM(*out_typetup, i, item); - } - if (noncount == nop) { - /* The whole signature was None, simply ignore type tuple */ - Py_DECREF(*out_typetup); - *out_typetup = NULL; - } - return 0; -} - - /* - * Finish conversion parsing of the type tuple. NumPy always only honored - * the type number for passed in descriptors/dtypes. + * Finish conversion parsing of the DType signature. NumPy always only + * honored the type number for passed in descriptors/dtypes. * The `dtype` argument is interpreted as the first output DType (not * descriptor). * Unlike the dtype of an `out` array, it influences loop selection! * - * NOTE: This function replaces the type tuple if passed in (it steals - * the original reference and returns a new object and reference)! - * The caller must XDECREF the type tuple both on error or success. - * - * The function returns a new, normalized type-tuple. + * It is the callers responsibility to clean `signature` and NULL it before + * calling. */ static int -_get_normalized_typetup(PyUFuncObject *ufunc, - PyObject *dtype_obj, PyObject *signature_obj, PyObject **out_typetup) +_get_fixed_signature(PyUFuncObject *ufunc, + PyObject *dtype_obj, PyObject *signature_obj, + PyArray_DTypeMeta **signature) { if (dtype_obj == NULL && signature_obj == NULL) { return 0; } - int res = -1; int nin = ufunc->nin, nout = ufunc->nout, nop = nin + nout; - /* - * TODO: `signature` will be the main result in the future and - * not the typetup. (Type tuple construction can be deffered to when - * the legacy fallback is used). - */ - PyArray_DTypeMeta *signature[NPY_MAXARGS]; - memset(signature, '\0', sizeof(*signature) * nop); if (dtype_obj != NULL) { if (dtype_obj == Py_None) { /* If `dtype=None` is passed, no need to do anything */ - assert(*out_typetup == NULL); return 0; } if (nout == 0) { @@ -4342,8 +4349,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc, signature[i] = dtype; } Py_DECREF(dtype); - res = _make_new_typetup(nop, signature, out_typetup); - goto finish; + return 0; } assert(signature_obj != NULL); @@ -4359,32 +4365,46 @@ _get_normalized_typetup(PyUFuncObject *ufunc, if (PyTuple_GET_ITEM(signature_obj, 0) == Py_None) { PyErr_SetString(PyExc_TypeError, "a single item type tuple cannot contain None."); - goto finish; + return -1; } if (DEPRECATE("The use of a length 1 tuple for the ufunc " "`signature` is deprecated. Use `dtype` or fill the" "tuple with `None`s.") < 0) { - goto finish; + return -1; } /* Use the same logic as for `dtype=` */ - res = _get_normalized_typetup(ufunc, - PyTuple_GET_ITEM(signature_obj, 0), NULL, out_typetup); - goto finish; + return _get_fixed_signature(ufunc, + PyTuple_GET_ITEM(signature_obj, 0), NULL, signature); } if (n != nop) { PyErr_Format(PyExc_ValueError, "a type-tuple must be specified of length %d for ufunc '%s'", nop, ufunc_get_name_cstr(ufunc)); - goto finish; + return -1; } for (int i = 0; i < nop; ++i) { PyObject *item = PyTuple_GET_ITEM(signature_obj, i); if (item == Py_None) { continue; } - signature[i] = _get_dtype(item); - if (signature[i] == NULL) { - goto finish; + else { + signature[i] = _get_dtype(item); + if (signature[i] == NULL) { + return -1; + } + else if (i < nin && signature[i]->abstract) { + /* + * We reject abstract input signatures for now. These + * can probably be defined by finding the common DType with + * the actual input and using the result of this for the + * promotion. + */ + PyErr_SetString(PyExc_TypeError, + "Input DTypes to the signature must not be " + "abstract. The behaviour may be defined in the " + "future."); + return -1; + } } } } @@ -4394,7 +4414,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc, if (PyBytes_Check(signature_obj)) { str_object = PyUnicode_FromEncodedObject(signature_obj, NULL, NULL); if (str_object == NULL) { - goto finish; + return -1; } } else { @@ -4406,7 +4426,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc, const char *str = PyUnicode_AsUTF8AndSize(str_object, &length); if (str == NULL) { Py_DECREF(str_object); - goto finish; + return -1; } if (length != 1 && (length != nin+nout + 2 || @@ -4415,18 +4435,17 @@ _get_normalized_typetup(PyUFuncObject *ufunc, "a type-string for %s, %d typecode(s) before and %d after " "the -> sign", ufunc_get_name_cstr(ufunc), nin, nout); Py_DECREF(str_object); - goto finish; + return -1; } if (length == 1 && nin+nout != 1) { Py_DECREF(str_object); if (DEPRECATE("The use of a length 1 string for the ufunc " "`signature` is deprecated. Use `dtype` attribute or " "pass a tuple with `None`s.") < 0) { - goto finish; + return -1; } /* `signature="l"` is the same as `dtype="l"` */ - res = _get_normalized_typetup(ufunc, str_object, NULL, out_typetup); - goto finish; + return _get_fixed_signature(ufunc, str_object, NULL, signature); } else { for (int i = 0; i < nin+nout; ++i) { @@ -4434,7 +4453,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc, PyArray_Descr *descr = PyArray_DescrFromType(str[istr]); if (descr == NULL) { Py_DECREF(str_object); - goto finish; + return -1; } signature[i] = NPY_DTYPE(descr); Py_INCREF(signature[i]); @@ -4446,15 +4465,79 @@ _get_normalized_typetup(PyUFuncObject *ufunc, else { PyErr_SetString(PyExc_TypeError, "the signature object to ufunc must be a string or a tuple."); - goto finish; + return -1; + } + return 0; +} + + +/* + * Fill in the actual descriptors used for the operation. This function + * supports falling back to the legacy `ufunc->type_resolver`. + * + * We guarantee the array-method that all passed in descriptors are of the + * correct DType instance (i.e. a string can just fetch the length, it doesn't + * need to "cast" to string first). + */ +static int +resolve_descriptors(int nop, + PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl, + PyArrayObject *operands[], PyArray_Descr *dtypes[], + PyArray_DTypeMeta *signature[], NPY_CASTING casting) +{ + int retval = -1; + PyArray_Descr *original_dtypes[NPY_MAXARGS]; + + for (int i = 0; i < nop; ++i) { + if (operands[i] == NULL) { + original_dtypes[i] = NULL; + } + else { + /* + * The dtype may mismatch the signature, in which case we need + * to make it fit before calling the resolution. + */ + PyArray_Descr *descr = PyArray_DTYPE(operands[i]); + original_dtypes[i] = PyArray_CastDescrToDType(descr, signature[i]); + if (original_dtypes[i] == NULL) { + nop = i; /* only this much is initialized */ + goto finish; + } + } + } + + NPY_UF_DBG_PRINT("Resolving the descriptors\n"); + + if (ufuncimpl->resolve_descriptors != &wrapped_legacy_resolve_descriptors) { + /* The default: use the `ufuncimpl` as nature intended it */ + NPY_CASTING safety = ufuncimpl->resolve_descriptors(ufuncimpl, + signature, original_dtypes, dtypes); + if (safety < 0) { + goto finish; + } + if (NPY_UNLIKELY(PyArray_MinCastSafety(safety, casting) != casting)) { + /* TODO: Currently impossible to reach (specialized unsafe loop) */ + PyErr_Format(PyExc_TypeError, + "The ufunc implementation for %s with the given dtype " + "signature is not possible under the casting rule %s", + ufunc_get_name_cstr(ufunc), npy_casting_to_string(casting)); + goto finish; + } + retval = 0; + } + else { + /* + * Fall-back to legacy resolver using `operands`, used exclusively + * for datetime64/timedelta64 and custom ufuncs (in pyerfa/astropy). + */ + retval = ufunc->type_resolver(ufunc, casting, operands, NULL, dtypes); } - res = _make_new_typetup(nop, signature, out_typetup); finish: - for (int i =0; i < nop; i++) { - Py_XDECREF(signature[i]); + for (int i = 0; i < nop; i++) { + Py_XDECREF(original_dtypes[i]); } - return res; + return retval; } @@ -4553,13 +4636,16 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, /* All following variables are cleared in the `fail` error path */ ufunc_full_args full_args; PyArrayObject *wheremask = NULL; - PyObject *typetup = NULL; + PyArray_DTypeMeta *signature[NPY_MAXARGS]; PyArrayObject *operands[NPY_MAXARGS]; + PyArray_DTypeMeta *operand_DTypes[NPY_MAXARGS]; PyArray_Descr *operation_descrs[NPY_MAXARGS]; PyObject *output_array_prepare[NPY_MAXARGS]; /* Initialize all arrays (we usually only need a small part) */ + memset(signature, 0, nop * sizeof(*signature)); memset(operands, 0, nop * sizeof(*operands)); + memset(operand_DTypes, 0, nop * sizeof(*operation_descrs)); memset(operation_descrs, 0, nop * sizeof(*operation_descrs)); memset(output_array_prepare, 0, nout * sizeof(*output_array_prepare)); @@ -4572,7 +4658,7 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, */ /* Check number of arguments */ - if ((len_args < nin) || (len_args > nop)) { + if (NPY_UNLIKELY((len_args < nin) || (len_args > nop))) { PyErr_Format(PyExc_TypeError, "%s() takes from %d to %d positional arguments but " "%zd were given", @@ -4731,7 +4817,8 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, * Parse the passed `dtype` or `signature` into an array containing * PyArray_DTypeMeta and/or None. */ - if (_get_normalized_typetup(ufunc, dtype_obj, signature_obj, &typetup) < 0) { + if (_get_fixed_signature(ufunc, + dtype_obj, signature_obj, signature) < 0) { goto fail; } @@ -4739,7 +4826,13 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, NPY_CASTING casting = NPY_DEFAULT_ASSIGN_CASTING; npy_bool subok = NPY_TRUE; int keepdims = -1; /* We need to know if it was passed */ - if (convert_ufunc_arguments(ufunc, full_args, operands, + npy_bool force_legacy_promotion; + npy_bool allow_legacy_promotion; + if (convert_ufunc_arguments(ufunc, + /* extract operand related information: */ + full_args, operands, + operand_DTypes, &force_legacy_promotion, &allow_legacy_promotion, + /* extract general information: */ order_obj, &order, casting_obj, &casting, subok_obj, &subok, @@ -4748,8 +4841,24 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, goto fail; } - if (ufunc->type_resolver(ufunc, - casting, operands, typetup, operation_descrs) < 0) { + /* + * Note that part of the promotion is to the complete the signature + * (until here it only represents the fixed part and is usually NULLs). + * + * After promotion, we could push the following logic into the ArrayMethod + * in the future. For now, we do it here. The type resolution step can + * be shared between the ufunc and gufunc code. + */ + PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, + operands, signature, + operand_DTypes, force_legacy_promotion, allow_legacy_promotion); + if (ufuncimpl == NULL) { + goto fail; + } + + /* Find the correct descriptors for the operation */ + if (resolve_descriptors(nop, ufunc, ufuncimpl, + operands, operation_descrs, signature, casting) < 0) { goto fail; } @@ -4761,20 +4870,17 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, * Do the final preparations and call the inner-loop. */ if (!ufunc->core_enabled) { - errval = PyUFunc_GenericFunctionInternal(ufunc, - operation_descrs, operands, - extobj, order, + errval = PyUFunc_GenericFunctionInternal(ufunc, ufuncimpl, + operation_descrs, operands, extobj, casting, order, output_array_prepare, full_args, /* for __array_prepare__ */ wheremask); } else { - errval = PyUFunc_GeneralizedFunctionInternal(ufunc, - operation_descrs, operands, - extobj, order, + errval = PyUFunc_GeneralizedFunctionInternal(ufunc, ufuncimpl, + operation_descrs, operands, extobj, casting, order, /* GUFuncs never (ever) called __array_prepare__! */ axis_obj, axes_obj, keepdims); } - if (errval < 0) { goto fail; } @@ -4785,6 +4891,7 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, */ Py_XDECREF(wheremask); for (int i = 0; i < nop; i++) { + Py_XDECREF(operand_DTypes[i]); Py_DECREF(operation_descrs[i]); if (i < nin) { Py_DECREF(operands[i]); @@ -4793,22 +4900,21 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc, Py_XDECREF(output_array_prepare[i-nin]); } } - Py_XDECREF(typetup); - /* The following steals the references to the outputs: */ PyObject *result = replace_with_wrapped_result_and_return(ufunc, full_args, subok, operands+nin); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); + return result; fail: - Py_XDECREF(typetup); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); Py_XDECREF(wheremask); for (int i = 0; i < ufunc->nargs; i++) { Py_XDECREF(operands[i]); + Py_XDECREF(operand_DTypes[i]); Py_XDECREF(operation_descrs[i]); if (i < nout) { Py_XDECREF(output_array_prepare[i]); @@ -5084,6 +5190,28 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector; ufunc->_always_null_previously_masked_innerloop_selector = NULL; + ufunc->op_flags = NULL; + ufunc->_loops = NULL; + if (nin + nout != 0) { + ufunc->_dispatch_cache = PyArrayIdentityHash_New(nin + nout); + if (ufunc->_dispatch_cache == NULL) { + Py_DECREF(ufunc); + return NULL; + } + } + else { + /* + * Work around a test that seems to do this right now, it should not + * be a valid ufunc at all though, so. TODO: Remove... + */ + ufunc->_dispatch_cache = NULL; + } + ufunc->_loops = PyList_New(0); + if (ufunc->_loops == NULL) { + Py_DECREF(ufunc); + return NULL; + } + if (name == NULL) { ufunc->name = "?"; } @@ -5105,6 +5233,29 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi return NULL; } } + + char *curr_types = ufunc->types; + for (int i = 0; i < ntypes * (nin + nout); i += nin + nout) { + /* + * Add all legacy wrapping loops here. This is normally not necessary, + * but makes sense. It could also help/be needed to avoid issues with + * ambiguous loops such as: `OO->?` and `OO->O` where in theory the + * wrong loop could be picked if only the second one is added. + */ + PyObject *info; + PyArray_DTypeMeta *op_dtypes[NPY_MAXARGS]; + for (int arg = 0; arg < nin + nout; arg++) { + op_dtypes[arg] = PyArray_DTypeFromTypeNum(curr_types[arg]); + /* These DTypes are immortal and adding INCREFs: so borrow it */ + Py_DECREF(op_dtypes[arg]); + } + curr_types += nin + nout; + + info = add_and_return_legacy_wrapping_ufunc_loop(ufunc, op_dtypes, 1); + if (info == NULL) { + return NULL; + } + } return (PyObject *)ufunc; } @@ -5320,6 +5471,8 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, PyArray_Descr *descr; PyUFunc_Loop1d *funcdata; PyObject *key, *cobj; + PyArray_DTypeMeta *signature[NPY_MAXARGS]; + PyObject *signature_tuple = NULL; int i; int *newtypes=NULL; @@ -5348,13 +5501,67 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, if (arg_types != NULL) { for (i = 0; i < ufunc->nargs; i++) { newtypes[i] = arg_types[i]; + signature[i] = PyArray_DTypeFromTypeNum(arg_types[i]); + Py_DECREF(signature[i]); /* DType can't be deleted... */ } } else { for (i = 0; i < ufunc->nargs; i++) { newtypes[i] = usertype; + signature[i] = PyArray_DTypeFromTypeNum(usertype); + Py_DECREF(signature[i]); /* DType can't be deleted... */ + } + } + + signature_tuple = PyArray_TupleFromItems( + ufunc->nargs, (PyObject **)signature, 0); + if (signature_tuple == NULL) { + goto fail; + } + /* + * We add the loop to the list of all loops and promoters. If the + * equivalent loop was already added, skip this. + * Note that even then the ufunc is still modified: The legacy ArrayMethod + * already looks up the inner-loop from the ufunc (and this is replaced + * below!). + * If the existing one is not a legacy ArrayMethod, we raise currently: + * A new-style loop should not be replaced by an old-style one. + */ + int add_new_loop = 1; + for (Py_ssize_t j = 0; j < PyList_GET_SIZE(ufunc->_loops); j++) { + PyObject *item = PyList_GET_ITEM(ufunc->_loops, j); + PyObject *existing_tuple = PyTuple_GET_ITEM(item, 0); + + int cmp = PyObject_RichCompareBool(existing_tuple, signature_tuple, Py_EQ); + if (cmp < 0) { + goto fail; + } + if (!cmp) { + continue; + } + PyObject *registered = PyTuple_GET_ITEM(item, 1); + if (!PyObject_TypeCheck(registered, &PyArrayMethod_Type) || ( + (PyArrayMethodObject *)registered)->get_strided_loop != + &get_wrapped_legacy_ufunc_loop) { + PyErr_Format(PyExc_TypeError, + "A non-compatible loop was already registered for " + "ufunc %s and DTypes %S.", + ufunc_get_name_cstr(ufunc), signature_tuple); + goto fail; + } + /* The loop was already added */ + add_new_loop = 0; + break; + } + if (add_new_loop) { + PyObject *info = add_and_return_legacy_wrapping_ufunc_loop( + ufunc, signature, 0); + if (info == NULL) { + goto fail; } } + /* Clearing sets it to NULL for the error paths */ + Py_CLEAR(signature_tuple); funcdata->func = function; funcdata->arg_types = newtypes; @@ -5429,6 +5636,7 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, fail: Py_DECREF(key); + Py_XDECREF(signature_tuple); PyArray_free(funcdata); PyArray_free(newtypes); if (!PyErr_Occurred()) PyErr_NoMemory(); @@ -5454,8 +5662,10 @@ ufunc_dealloc(PyUFuncObject *ufunc) if (ufunc->identity == PyUFunc_IdentityValue) { Py_DECREF(ufunc->identity_value); } - if (ufunc->obj != NULL) { - Py_DECREF(ufunc->obj); + Py_XDECREF(ufunc->obj); + Py_XDECREF(ufunc->_loops); + if (ufunc->_dispatch_cache != NULL) { + PyArrayIdentityHash_Dealloc(ufunc->_dispatch_cache); } PyObject_GC_Del(ufunc); } diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 468327b8c..a7d536656 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -1,4 +1,16 @@ /* + * NOTE: The type resolution defined in this file is considered legacy. + * + * The new mechanism separates type resolution and promotion into two + * distinct steps, as per NEP 43. + * Further, the functions in this file rely on the operands rather than + * only the DTypes/descriptors. They are still called and at this point + * vital (NumPy ~1.21), but should hopefully become largely irrelevant very + * quickly. + * + * At that point, this file should be deletable in its entirety. + * + * * This file implements type resolution for NumPy element-wise ufuncs. * This mechanism is still backwards-compatible with the pre-existing * legacy mechanism, so performs much slower than is necessary. @@ -89,9 +101,9 @@ raise_binary_type_reso_error(PyUFuncObject *ufunc, PyArrayObject **operands) { /** Helper function to raise UFuncNoLoopError * Always returns -1 to indicate the exception was raised, for convenience */ -static int +NPY_NO_EXPORT int raise_no_loop_found_error( - PyUFuncObject *ufunc, PyArray_Descr **dtypes) + PyUFuncObject *ufunc, PyObject **dtypes) { static PyObject *exc_type = NULL; @@ -102,8 +114,7 @@ raise_no_loop_found_error( return -1; } - PyObject *dtypes_tup = PyArray_TupleFromItems( - ufunc->nargs, (PyObject **)dtypes, 1); + PyObject *dtypes_tup = PyArray_TupleFromItems(ufunc->nargs, dtypes, 1); if (dtypes_tup == NULL) { return -1; } @@ -119,6 +130,7 @@ raise_no_loop_found_error( return -1; } + static int raise_casting_error( PyObject *exc_type, @@ -333,22 +345,30 @@ PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc, if (out_dtypes[0] == NULL) { return -1; } + out_dtypes[1] = out_dtypes[0]; + Py_INCREF(out_dtypes[1]); } else { /* Not doing anything will lead to a loop no found error. */ out_dtypes[0] = PyArray_DESCR(operands[0]); Py_INCREF(out_dtypes[0]); + out_dtypes[1] = PyArray_DESCR(operands[1]); + Py_INCREF(out_dtypes[1]); } - out_dtypes[1] = out_dtypes[0]; - Py_INCREF(out_dtypes[1]); } else { PyArray_Descr *descr; /* + * DEPRECATED 2021-03, NumPy 1.20 + * * If the type tuple was originally a single element (probably), * issue a deprecation warning, but otherwise accept it. Since the * result dtype is always boolean, this is not actually valid unless it * is `object` (but if there is an object input we already deferred). + * + * TODO: Once this deprecation is gone, the special case for + * `PyUFunc_SimpleBinaryComparisonTypeResolver` in dispatching.c + * can be removed. */ if (PyTuple_Check(type_tup) && PyTuple_GET_SIZE(type_tup) == 3 && PyTuple_GET_ITEM(type_tup, 0) == Py_None && @@ -527,7 +547,7 @@ PyUFunc_SimpleUniformOperationTypeResolver( out_dtypes[iop] = PyArray_DESCR(operands[iop]); Py_INCREF(out_dtypes[iop]); } - raise_no_loop_found_error(ufunc, out_dtypes); + raise_no_loop_found_error(ufunc, (PyObject **)out_dtypes); for (iop = 0; iop < ufunc->nin; iop++) { Py_DECREF(out_dtypes[iop]); out_dtypes[iop] = NULL; @@ -1492,7 +1512,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, types += nargs; } - return raise_no_loop_found_error(ufunc, dtypes); + return raise_no_loop_found_error(ufunc, (PyObject **)dtypes); } diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index cd0ff4a0d..dd88a081a 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -135,4 +135,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc, void **out_innerloopdata, int *out_needs_api); +NPY_NO_EXPORT int +raise_no_loop_found_error(PyUFuncObject *ufunc, PyObject **dtypes); + #endif diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index 9d1d514fb..becd65b11 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -307,8 +307,8 @@ class TestModulus: # promotes to float which does not fit a = np.array([1, 2], np.int64) b = np.array([1, 2], np.uint64) - pattern = 'could not be coerced to provided output parameter' - with assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, + match=r"Cannot cast ufunc 'floor_divide' output from"): a //= b diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 0251f21a9..dab11d948 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -164,8 +164,9 @@ class TestUfuncGenericLoops: except AttributeError: return lambda: getattr(np.core.umath, attr)(val) - num_arr = np.array([val], dtype=np.float64) - obj_arr = np.array([MyFloat(val)], dtype="O") + # Use 0-D arrays, to ensure the same element call + num_arr = np.array(val, dtype=np.float64) + obj_arr = np.array(MyFloat(val), dtype="O") with np.errstate(all="raise"): try: @@ -1711,9 +1712,17 @@ class TestUfunc: target = np.array([0, 2, 4], dtype=_rational_tests.rational) assert_equal(result, target) - # no output type should raise TypeError + # The new resolution means that we can (usually) find custom loops + # as long as they match exactly: + result = _rational_tests.test_add(a, b) + assert_equal(result, target) + + # But since we use the old type resolver, this may not work + # for dtype variations unless the output dtype is given: + result = _rational_tests.test_add(a, b.astype(np.uint16), out=c) + assert_equal(result, target) with assert_raises(TypeError): - _rational_tests.test_add(a, b) + _rational_tests.test_add(a, b.astype(np.uint16)) def test_operand_flags(self): a = np.arange(16, dtype='l').reshape(4, 4) @@ -2029,8 +2038,7 @@ class TestUfunc: np.true_divide, np.floor_divide, np.bitwise_and, np.bitwise_or, np.bitwise_xor, np.left_shift, np.right_shift, np.fmax, np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2, - np.logical_and, np.logical_or, np.logical_xor, np.maximum, - np.minimum, np.mod, + np.maximum, np.minimum, np.mod, np.greater, np.greater_equal, np.less, np.less_equal, np.equal, np.not_equal] |