summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2021-07-28 22:24:36 +0300
committerGitHub <noreply@github.com>2021-07-28 22:24:36 +0300
commit2c1a34daa024398cdf9c6e1fbeff52b4eb280551 (patch)
tree67644370dec385ec09793332170276ce3af8147d
parente12112e7caf72f93e07128f6e7a8a3399f1c6bc9 (diff)
parentc6fdddeddbbe176ada161955094739998ff15272 (diff)
downloadnumpy-2c1a34daa024398cdf9c6e1fbeff52b4eb280551.tar.gz
Merge pull request #19571 from seberg/ufunc-refactor-2021-normal
MAINT: Refactor UFunc core to use NEP 43 style dispatching
-rw-r--r--doc/source/reference/c-api/types-and-structures.rst10
-rw-r--r--numpy/core/include/numpy/ufuncobject.h6
-rw-r--r--numpy/core/setup.py2
-rw-r--r--numpy/core/src/multiarray/array_method.c76
-rw-r--r--numpy/core/src/multiarray/array_method.h20
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c5
-rw-r--r--numpy/core/src/umath/dispatching.c688
-rw-r--r--numpy/core/src/umath/dispatching.h22
-rw-r--r--numpy/core/src/umath/legacy_array_method.c257
-rw-r--r--numpy/core/src/umath/legacy_array_method.h33
-rw-r--r--numpy/core/src/umath/ufunc_object.c776
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c36
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h3
-rw-r--r--numpy/core/tests/test_scalarmath.py4
-rw-r--r--numpy/core/tests/test_ufunc.py20
15 files changed, 1604 insertions, 354 deletions
diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst
index 75a97c20e..54a1e09e1 100644
--- a/doc/source/reference/c-api/types-and-structures.rst
+++ b/doc/source/reference/c-api/types-and-structures.rst
@@ -818,6 +818,7 @@ PyUFunc_Type and PyUFuncObject
npy_intp *core_dim_sizes;
npy_uint32 *core_dim_flags;
PyObject *identity_value;
+ /* Further private slots (size depends on the NumPy version) */
} PyUFuncObject;
.. c:macro: PyObject_HEAD
@@ -957,9 +958,12 @@ PyUFunc_Type and PyUFuncObject
.. c:member:: PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector
- A function which returns an inner loop. The ``legacy`` in the name arises
- because for NumPy 1.6 a better variant had been planned. This variant
- has not yet come about.
+ .. deprecated:: 1.22
+
+ Some fallback support for this slot exists, but will be removed
+ eventually. A univiersal function which relied on this will have
+ eventually have to be ported.
+ See ref:`NEP 41 <NEP41>` and ref:`NEP 43 <NEP43>`
.. c:member:: void *reserved2
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 0f3b8529a..fd7307703 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -211,6 +211,12 @@ typedef struct _tagPyUFuncObject {
/* Identity for reduction, when identity == PyUFunc_IdentityValue */
PyObject *identity_value;
+ /* New in NPY_API_VERSION 0x0000000F and above */
+
+ /* New private fields related to dispatching */
+ void *_dispatch_cache;
+ /* A PyListObject of `(tuple of DTypes, ArrayMethod/Promoter)` */
+ PyObject *_loops;
} PyUFuncObject;
#include "arrayobject.h"
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 29d309f74..c20320910 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -928,6 +928,8 @@ def configuration(parent_package='',top_path=None):
join('src', 'umath', 'matmul.c.src'),
join('src', 'umath', 'clip.h.src'),
join('src', 'umath', 'clip.c.src'),
+ join('src', 'umath', 'dispatching.c'),
+ join('src', 'umath', 'legacy_array_method.c'),
join('src', 'umath', 'ufunc_object.c'),
join('src', 'umath', 'extobj.c'),
join('src', 'umath', 'scalarmath.c.src'),
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c
index cc841ee64..c1b6d4e71 100644
--- a/numpy/core/src/multiarray/array_method.c
+++ b/numpy/core/src/multiarray/array_method.c
@@ -757,9 +757,6 @@ boundarraymethod__simple_strided_call(
/*
- * TODO: Currently still based on the old ufunc system and not ArrayMethod!
- * This requires fixing the ufunc code first.
- *
* Support for masked inner-strided loops. Masked inner-strided loops are
* only used in the ufunc machinery. So this special cases them.
* In the future it probably makes sense to create an::
@@ -770,8 +767,8 @@ boundarraymethod__simple_strided_call(
*/
typedef struct {
NpyAuxData base;
- PyUFuncGenericFunction unmasked_stridedloop;
- void *innerloopdata;
+ PyArrayMethod_StridedLoop *unmasked_stridedloop;
+ NpyAuxData *unmasked_auxdata;
int nargs;
char *dataptrs[];
} _masked_stridedloop_data;
@@ -781,6 +778,7 @@ static void
_masked_stridedloop_data_free(NpyAuxData *auxdata)
{
_masked_stridedloop_data *data = (_masked_stridedloop_data *)auxdata;
+ NPY_AUXDATA_FREE(data->unmasked_auxdata);
PyMem_Free(data);
}
@@ -790,15 +788,15 @@ _masked_stridedloop_data_free(NpyAuxData *auxdata)
* masked strided-loop, only calling the function for elements
* where the mask is True.
*/
-static void
-unmasked_ufunc_loop_as_masked(
- char **data, const npy_intp *dimensions,
- const npy_intp *strides, void *_auxdata)
+static int
+generic_masked_strided_loop(PyArrayMethod_Context *context,
+ char *const *data, const npy_intp *dimensions,
+ const npy_intp *strides, NpyAuxData *_auxdata)
{
_masked_stridedloop_data *auxdata = (_masked_stridedloop_data *)_auxdata;
int nargs = auxdata->nargs;
- PyUFuncGenericFunction strided_loop = auxdata->unmasked_stridedloop;
- void *innerloopdata = auxdata->innerloopdata;
+ PyArrayMethod_StridedLoop *strided_loop = auxdata->unmasked_stridedloop;
+ NpyAuxData *strided_loop_auxdata = auxdata->unmasked_auxdata;
char **dataptrs = auxdata->dataptrs;
memcpy(dataptrs, data, nargs * sizeof(char *));
@@ -819,39 +817,37 @@ unmasked_ufunc_loop_as_masked(
/* Process unmasked values */
mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 0);
- strided_loop(dataptrs, &subloopsize, strides, innerloopdata);
+ int res = strided_loop(context,
+ dataptrs, &subloopsize, strides, strided_loop_auxdata);
+ if (res != 0) {
+ return res;
+ }
for (int i = 0; i < nargs; i++) {
dataptrs[i] += subloopsize * strides[i];
}
N -= subloopsize;
} while (N > 0);
+
+ return 0;
}
/*
- * TODO: This function will be the masked equivalent to `get_loop`.
- * This function wraps a legacy inner loop so it becomes masked.
- *
- * Returns 0 on success, -1 on error.
+ * Identical to the `get_loop` functions and wraps it. This adds support
+ * to a boolean mask being passed in as a last, additional, operand.
+ * The wrapped loop will only be called for unmasked elements.
+ * (Does not support `move_references` or inner dimensions!)
*/
NPY_NO_EXPORT int
-PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
- PyArray_Descr **dtypes,
- PyUFuncGenericFunction *out_innerloop,
- NpyAuxData **out_innerloopdata,
- int *out_needs_api)
+PyArrayMethod_GetMaskedStridedLoop(
+ PyArrayMethod_Context *context,
+ int aligned, npy_intp *fixed_strides,
+ PyArrayMethod_StridedLoop **out_loop,
+ NpyAuxData **out_transferdata,
+ NPY_ARRAYMETHOD_FLAGS *flags)
{
- int retcode;
_masked_stridedloop_data *data;
- int nargs = ufunc->nin + ufunc->nout;
-
- if (ufunc->legacy_inner_loop_selector == NULL) {
- PyErr_SetString(PyExc_RuntimeError,
- "the ufunc default masked inner loop selector doesn't "
- "yet support wrapping the new inner loop selector, it "
- "still only wraps the legacy inner loop selector");
- return -1;
- }
+ int nargs = context->method->nin + context->method->nout;
/* Add working memory for the data pointers, to modify them in-place */
data = PyMem_Malloc(sizeof(_masked_stridedloop_data) +
@@ -865,18 +861,14 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
data->unmasked_stridedloop = NULL;
data->nargs = nargs;
- /* Get the unmasked ufunc inner loop */
- retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &data->unmasked_stridedloop, &data->innerloopdata,
- out_needs_api);
- if (retcode < 0) {
- PyArray_free(data);
- return retcode;
+ if (context->method->get_strided_loop(context,
+ aligned, 0, fixed_strides,
+ &data->unmasked_stridedloop, &data->unmasked_auxdata, flags) < 0) {
+ PyMem_Free(data);
+ return -1;
}
-
- /* Return the loop function + aux data */
- *out_innerloop = &unmasked_ufunc_loop_as_masked;
- *out_innerloopdata = (NpyAuxData *)data;
+ *out_transferdata = (NpyAuxData *)data;
+ *out_loop = generic_masked_strided_loop;
return 0;
}
diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h
index c2122a2da..fc2304889 100644
--- a/numpy/core/src/multiarray/array_method.h
+++ b/numpy/core/src/multiarray/array_method.h
@@ -17,6 +17,7 @@ typedef enum {
* setup/check. No function should set error flags and ignore them
* since it would interfere with chaining operations (e.g. casting).
*/
+ /* TODO: Change this into a positive flag */
NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2,
/* Whether the method supports unaligned access (not runtime) */
NPY_METH_SUPPORTS_UNALIGNED = 1 << 3,
@@ -158,17 +159,16 @@ npy_default_get_strided_loop(
PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata,
NPY_ARRAYMETHOD_FLAGS *flags);
-/*
- * TODO: This function will not rely on the current ufunc code after the
- * ufunc refactor.
- */
-#include "numpy/ufuncobject.h"
+
NPY_NO_EXPORT int
-PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
- PyArray_Descr **dtypes,
- PyUFuncGenericFunction *out_innerloop,
- NpyAuxData **out_innerloopdata,
- int *out_needs_api);
+PyArrayMethod_GetMaskedStridedLoop(
+ PyArrayMethod_Context *context,
+ int aligned,
+ npy_intp *fixed_strides,
+ PyArrayMethod_StridedLoop **out_loop,
+ NpyAuxData **out_transferdata,
+ NPY_ARRAYMETHOD_FLAGS *flags);
+
/*
* TODO: This function is the internal version, and its error paths may
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index a0154e474..98d4f5a75 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -449,6 +449,11 @@ NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
/*
* If REFS_OK was specified, check whether there are any
* reference arrays and flag it if so.
+ *
+ * NOTE: This really should be unnecessary, but chances are someone relies
+ * on it. The iterator itself does not require the API here
+ * as it only does so for casting/buffering. But in almost all
+ * use-cases the API will be required for whatever operation is done.
*/
if (flags & NPY_ITER_REFS_OK) {
for (iop = 0; iop < nop; ++iop) {
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
new file mode 100644
index 000000000..e63780458
--- /dev/null
+++ b/numpy/core/src/umath/dispatching.c
@@ -0,0 +1,688 @@
+/*
+ * This file implements universal function dispatching and promotion (which
+ * is necessary to happen before dispatching).
+ * This is part of the UFunc object. Promotion and dispatching uses the
+ * following things:
+ *
+ * - operand_DTypes: The datatypes as passed in by the user.
+ * - signature: The DTypes fixed by the user with `dtype=` or `signature=`.
+ * - ufunc._loops: A list of all ArrayMethods and promoters, it contains
+ * tuples `(dtypes, ArrayMethod)` or `(dtypes, promoter)`.
+ * - ufunc._dispatch_cache: A cache to store previous promotion and/or
+ * dispatching results.
+ * - The actual arrays are used to support the old code paths where necessary.
+ * (this includes any value-based casting/promotion logic)
+ *
+ * In general, `operand_Dtypes` is always overridden by `signature`. If a
+ * DType is included in the `signature` it must match precisely.
+ *
+ * The process of dispatching and promotion can be summarized in the following
+ * steps:
+ *
+ * 1. Override any `operand_DTypes` from `signature`.
+ * 2. Check if the new `operand_Dtypes` is cached (if it is, got to 4.)
+ * 3. Find the best matching "loop". This is done using multiple dispatching
+ * on all `operand_DTypes` and loop `dtypes`. A matching loop must be
+ * one whose DTypes are superclasses of the `operand_DTypes` (that are
+ * defined). The best matching loop must be better than any other matching
+ * loop. This result is cached.
+ * 4. If the found loop is a promoter: We call the promoter. It can modify
+ * the `operand_DTypes` currently. Then go back to step 2.
+ * (The promoter can call arbitrary code, so it could even add the matching
+ * loop first.)
+ * 5. The final `ArrayMethod` is found, its registered `dtypes` is copied
+ * into the `signature` so that it is available to the ufunc loop.
+ *
+ */
+#include <Python.h>
+
+#define _UMATHMODULE
+#define _MULTIARRAYMODULE
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#include "numpy/ndarraytypes.h"
+#include "common.h"
+
+#include "dispatching.h"
+#include "dtypemeta.h"
+#include "npy_hashtable.h"
+#include "legacy_array_method.h"
+#include "ufunc_object.h"
+#include "ufunc_type_resolution.h"
+
+
+/* forward declaration */
+static NPY_INLINE PyObject *
+promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
+ PyArrayObject *const ops[],
+ PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *op_dtypes[],
+ npy_bool allow_legacy_promotion, npy_bool cache);
+
+
+/**
+ * Function to add a new loop to the ufunc. This mainly appends it to the
+ * list (as it currently is just a list).
+ *
+ * @param ufunc The universal function to add the loop to.
+ * @param info The tuple (dtype_tuple, ArrayMethod/promoter).
+ * @param ignore_duplicate If 1 and a loop with the same `dtype_tuple` is
+ * found, the function does nothing.
+ */
+static int
+add_ufunc_loop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate)
+{
+ /*
+ * Validate the info object, this should likely move to to a different
+ * entry-point in the future (and is mostly unnecessary currently).
+ */
+ if (!PyTuple_CheckExact(info) || PyTuple_GET_SIZE(info) != 2) {
+ PyErr_SetString(PyExc_TypeError,
+ "Info must be a tuple: "
+ "(tuple of DTypes or None, ArrayMethod or promoter)");
+ return -1;
+ }
+ PyObject *DType_tuple = PyTuple_GetItem(info, 0);
+ if (PyTuple_GET_SIZE(DType_tuple) != ufunc->nargs) {
+ PyErr_SetString(PyExc_TypeError,
+ "DType tuple length does not match ufunc number of operands");
+ return -1;
+ }
+ for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(DType_tuple); i++) {
+ PyObject *item = PyTuple_GET_ITEM(DType_tuple, i);
+ if (item != Py_None
+ && !PyObject_TypeCheck(item, &PyArrayDTypeMeta_Type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "DType tuple may only contain None and DType classes");
+ return -1;
+ }
+ }
+ if (!PyObject_TypeCheck(PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) {
+ /* Must also accept promoters in the future. */
+ PyErr_SetString(PyExc_TypeError,
+ "Second argument to info must be an ArrayMethod or promoter");
+ return -1;
+ }
+
+ if (ufunc->_loops == NULL) {
+ ufunc->_loops = PyList_New(0);
+ if (ufunc->_loops == NULL) {
+ return -1;
+ }
+ }
+
+ PyObject *loops = ufunc->_loops;
+ Py_ssize_t length = PyList_Size(loops);
+ for (Py_ssize_t i = 0; i < length; i++) {
+ PyObject *item = PyList_GetItem(loops, i);
+ PyObject *cur_DType_tuple = PyTuple_GetItem(item, 0);
+ int cmp = PyObject_RichCompareBool(cur_DType_tuple, DType_tuple, Py_EQ);
+ if (cmp < 0) {
+ return -1;
+ }
+ if (cmp == 0) {
+ continue;
+ }
+ if (ignore_duplicate) {
+ return 0;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "A loop/promoter has already been registered with '%s' for %R",
+ ufunc_get_name_cstr(ufunc), DType_tuple);
+ return -1;
+ }
+
+ if (PyList_Append(loops, info) < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+
+/**
+ * Resolves the implementation to use, this uses typical multiple dispatching
+ * methods of finding the best matching implementation or resolver.
+ * (Based on `isinstance()`, the knowledge that non-abstract DTypes cannot
+ * be subclassed is used, however.)
+ *
+ * @param ufunc
+ * @param op_dtypes The DTypes that are either passed in (defined by an
+ * operand) or defined by the `signature` as also passed in as
+ * `fixed_DTypes`.
+ * @param out_info Returns the tuple describing the best implementation
+ * (consisting of dtypes and ArrayMethod or promoter).
+ * WARNING: Returns a borrowed reference!
+ * @returns -1 on error 0 on success. Note that the output can be NULL on
+ * success if nothing is found.
+ */
+static int
+resolve_implementation_info(PyUFuncObject *ufunc,
+ PyArray_DTypeMeta *op_dtypes[], PyObject **out_info)
+{
+ int nin = ufunc->nin, nargs = ufunc->nargs;
+ Py_ssize_t size = PySequence_Length(ufunc->_loops);
+ PyObject *best_dtypes = NULL;
+ PyObject *best_resolver_info = NULL;
+
+ for (Py_ssize_t res_idx = 0; res_idx < size; res_idx++) {
+ /* Test all resolvers */
+ PyObject *resolver_info = PySequence_Fast_GET_ITEM(
+ ufunc->_loops, res_idx);
+ PyObject *curr_dtypes = PyTuple_GET_ITEM(resolver_info, 0);
+ /*
+ * Test if the current resolver matches, it could make sense to
+ * reorder these checks to avoid the IsSubclass check as much as
+ * possible.
+ */
+
+ npy_bool matches = NPY_TRUE;
+ /*
+ * NOTE: We check also the output DType. In principle we do not
+ * have to strictly match it (unless it is provided by the
+ * `signature`). This assumes that a (fallback) promoter will
+ * unset the output DType if no exact match is found.
+ */
+ for (Py_ssize_t i = 0; i < nargs; i++) {
+ PyArray_DTypeMeta *given_dtype = op_dtypes[i];
+ PyArray_DTypeMeta *resolver_dtype = (
+ (PyArray_DTypeMeta *)PyTuple_GET_ITEM(curr_dtypes, i));
+ assert((PyObject *)given_dtype != Py_None);
+ if (given_dtype == NULL && i >= nin) {
+ /* Unspecified out always matches (see below for inputs) */
+ continue;
+ }
+ if (given_dtype == resolver_dtype) {
+ continue;
+ }
+ if (!resolver_dtype->abstract) {
+ matches = NPY_FALSE;
+ break;
+ }
+ if (given_dtype == NULL) {
+ /*
+ * If an input was not specified, this is a reduce-like
+ * operation: reductions use `(operand_DType, NULL, out_DType)`
+ * as they only have a single operand. This allows special
+ * reduce promotion rules useful for example for sum/product.
+ * E.g. `np.add.reduce([True, True])` promotes to integer.
+ *
+ * Continuing here allows a promoter to handle reduce-like
+ * promotions explicitly if necessary.
+ * TODO: The `!resolver_dtype->abstract` currently ensures that
+ * this is a promoter. If we allow ArrayMethods to use
+ * abstract DTypes, we may have to reject it here or the
+ * ArrayMethod has to implement the reduce promotion.
+ */
+ continue;
+ }
+ int subclass = PyObject_IsSubclass(
+ (PyObject *)given_dtype, (PyObject *)resolver_dtype);
+ if (subclass < 0) {
+ return -1;
+ }
+ if (!subclass) {
+ matches = NPY_FALSE;
+ break;
+ }
+ /*
+ * TODO: Could consider allowing reverse subclass relation, i.e.
+ * the operation DType passed in to be abstract. That
+ * definitely is OK for outputs (and potentially useful,
+ * you could enforce e.g. an inexact result).
+ * It might also be useful for some stranger promoters.
+ */
+ }
+ if (!matches) {
+ continue;
+ }
+
+ /* The resolver matches, but we have to check if it is better */
+ if (best_dtypes != NULL) {
+ int current_best = -1; /* -1 neither, 0 current best, 1 new */
+ /*
+ * If both have concrete and None in the same position and
+ * they are identical, we will continue searching using the
+ * first best for comparison, in an attempt to find a better
+ * one.
+ * In all cases, we give up resolution, since it would be
+ * necessary to compare to two "best" cases.
+ */
+ int unambiguously_equally_good = 1;
+ for (Py_ssize_t i = 0; i < nargs; i++) {
+ int best;
+
+ PyObject *prev_dtype = PyTuple_GET_ITEM(best_dtypes, i);
+ PyObject *new_dtype = PyTuple_GET_ITEM(curr_dtypes, i);
+
+ if (prev_dtype == new_dtype) {
+ /* equivalent, so this entry does not matter */
+ continue;
+ }
+ /*
+ * TODO: Even if the input is not specified, if we have
+ * abstract DTypes and one is a subclass of the other,
+ * the subclass should be considered a better match
+ * (subclasses are always more specific).
+ */
+ /* If either is None, the other is strictly more specific */
+ if (prev_dtype == Py_None) {
+ unambiguously_equally_good = 0;
+ best = 1;
+ }
+ else if (new_dtype == Py_None) {
+ unambiguously_equally_good = 0;
+ best = 0;
+ }
+ /*
+ * If both are concrete and not identical, this is
+ * ambiguous.
+ */
+ else if (!((PyArray_DTypeMeta *)prev_dtype)->abstract &&
+ !((PyArray_DTypeMeta *)new_dtype)->abstract) {
+ /*
+ * Ambiguous unless the are identical (checked above),
+ * but since they are concrete it does not matter which
+ * best to compare.
+ */
+ best = -1;
+ }
+ /*
+ * TODO: Unreachable, but we will need logic for abstract
+ * DTypes to decide if one is a subclass of the other
+ * (And their subclass relation is well defined.)
+ */
+ else {
+ assert(0);
+ }
+
+ if ((current_best != -1) && (current_best != best)) {
+ /*
+ * We need a clear best, this could be tricky, unless
+ * the signature is identical, we would have to compare
+ * against both of the found ones until we find a
+ * better one.
+ * Instead, only support the case where they are
+ * identical.
+ */
+ /* TODO: Document the above comment, may need relaxing? */
+ current_best = -1;
+ break;
+ }
+ current_best = best;
+ }
+
+ if (current_best == -1) {
+ /*
+ * TODO: It would be nice to have a "diagnostic mode" that
+ * informs if this happens! (An immediate error currently
+ * blocks later legacy resolution, but may work in the
+ * future.)
+ */
+ if (unambiguously_equally_good) {
+ /* unset the best resolver to indicate this */
+ best_resolver_info = NULL;
+ continue;
+ }
+ *out_info = NULL;
+ return 0;
+ }
+ else if (current_best == 0) {
+ /* The new match is not better, continue looking. */
+ continue;
+ }
+ }
+ /* The new match is better (or there was no previous match) */
+ best_dtypes = curr_dtypes;
+ best_resolver_info = resolver_info;
+ }
+ if (best_dtypes == NULL) {
+ /* The non-legacy lookup failed */
+ *out_info = NULL;
+ return 0;
+ }
+
+ *out_info = best_resolver_info;
+ return 0;
+}
+
+
+/*
+ * A promoter can currently be either a C-Capsule containing a promoter
+ * function pointer, or a Python function. Both of these can at this time
+ * only return new operation DTypes (i.e. mutate the input while leaving
+ * those defined by the `signature` unmodified).
+ */
+static PyObject *
+call_promoter_and_recurse(
+ PyUFuncObject *NPY_UNUSED(ufunc), PyObject *NPY_UNUSED(promoter),
+ PyArray_DTypeMeta *NPY_UNUSED(op_dtypes[]),
+ PyArray_DTypeMeta *NPY_UNUSED(signature[]),
+ PyArrayObject *const NPY_UNUSED(operands[]))
+{
+ PyErr_SetString(PyExc_NotImplementedError,
+ "Internal NumPy error, promoters are not used/implemented yet.");
+ return NULL;
+}
+
+
+/*
+ * Convert the DType `signature` into the tuple of descriptors that is used
+ * by the old ufunc type resolvers in `ufunc_type_resolution.c`.
+ *
+ * Note that we do not need to pass the type tuple when we use the legacy path
+ * for type resolution rather than promotion, since the signature is always
+ * correct in that case.
+ */
+static int
+_make_new_typetup(
+ int nop, PyArray_DTypeMeta *signature[], PyObject **out_typetup) {
+ *out_typetup = PyTuple_New(nop);
+ if (*out_typetup == NULL) {
+ return -1;
+ }
+
+ int none_count = 0;
+ for (int i = 0; i < nop; i++) {
+ PyObject *item;
+ if (signature[i] == NULL) {
+ item = Py_None;
+ none_count++;
+ }
+ else {
+ if (!signature[i]->legacy || signature[i]->abstract) {
+ /*
+ * The legacy type resolution can't deal with these.
+ * This path will return `None` or so in the future to
+ * set an error later if the legacy type resolution is used.
+ */
+ PyErr_SetString(PyExc_RuntimeError,
+ "Internal NumPy error: new DType in signature not yet "
+ "supported. (This should be unreachable code!)");
+ Py_SETREF(*out_typetup, NULL);
+ return -1;
+ }
+ item = (PyObject *)signature[i]->singleton;
+ }
+ Py_INCREF(item);
+ PyTuple_SET_ITEM(*out_typetup, i, item);
+ }
+ if (none_count == nop) {
+ /* The whole signature was None, simply ignore type tuple */
+ Py_DECREF(*out_typetup);
+ *out_typetup = NULL;
+ }
+ return 0;
+}
+
+
+/*
+ * Fills in the operation_DTypes with borrowed references. This may change
+ * the content, since it will use the legacy type resolution, which can special
+ * case 0-D arrays (using value-based logic).
+ */
+static int
+legacy_promote_using_legacy_type_resolver(PyUFuncObject *ufunc,
+ PyArrayObject *const *ops, PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *operation_DTypes[], int *out_cacheable)
+{
+ int nargs = ufunc->nargs;
+ PyArray_Descr *out_descrs[NPY_MAXARGS] = {NULL};
+
+ PyObject *type_tuple = NULL;
+ if (_make_new_typetup(nargs, signature, &type_tuple) < 0) {
+ return -1;
+ }
+
+ /*
+ * We use unsafe casting. This is of course not accurate, but that is OK
+ * here, because for promotion/dispatching the casting safety makes no
+ * difference. Whether the actual operands can be casts must be checked
+ * during the type resolution step (which may _also_ calls this!).
+ */
+ if (ufunc->type_resolver(ufunc,
+ NPY_UNSAFE_CASTING, (PyArrayObject **)ops, type_tuple,
+ out_descrs) < 0) {
+ Py_XDECREF(type_tuple);
+ return -1;
+ }
+ Py_XDECREF(type_tuple);
+
+ for (int i = 0; i < nargs; i++) {
+ Py_XSETREF(operation_DTypes[i], NPY_DTYPE(out_descrs[i]));
+ Py_INCREF(operation_DTypes[i]);
+ Py_DECREF(out_descrs[i]);
+ }
+ if (ufunc->type_resolver == &PyUFunc_SimpleBinaryComparisonTypeResolver) {
+ /*
+ * In this one case, the deprecation means that we actually override
+ * the signature.
+ */
+ for (int i = 0; i < nargs; i++) {
+ if (signature[i] != NULL && signature[i] != operation_DTypes[i]) {
+ Py_INCREF(operation_DTypes[i]);
+ Py_SETREF(signature[i], operation_DTypes[i]);
+ *out_cacheable = 0;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * Note, this function returns a BORROWED references to info since it adds
+ * it to the loops.
+ */
+NPY_NO_EXPORT PyObject *
+add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
+ PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate)
+{
+ PyObject *DType_tuple = PyArray_TupleFromItems(ufunc->nargs,
+ (PyObject **)operation_dtypes, 0);
+ if (DType_tuple == NULL) {
+ return NULL;
+ }
+
+ PyArrayMethodObject *method = PyArray_NewLegacyWrappingArrayMethod(
+ ufunc, operation_dtypes);
+ if (method == NULL) {
+ Py_DECREF(DType_tuple);
+ return NULL;
+ }
+ PyObject *info = PyTuple_Pack(2, DType_tuple, method);
+ Py_DECREF(DType_tuple);
+ Py_DECREF(method);
+ if (info == NULL) {
+ return NULL;
+ }
+ if (add_ufunc_loop(ufunc, info, ignore_duplicate) < 0) {
+ Py_DECREF(info);
+ return NULL;
+ }
+
+ return info;
+}
+
+
+/*
+ * The main implementation to find the correct DType signature and ArrayMethod
+ * to use for a ufunc. This function may recurse with `do_legacy_fallback`
+ * set to False.
+ *
+ * If value-based promotion is necessary, this is handled ahead of time by
+ * `promote_and_get_ufuncimpl`.
+ */
+static NPY_INLINE PyObject *
+promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
+ PyArrayObject *const ops[],
+ PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *op_dtypes[],
+ npy_bool allow_legacy_promotion, npy_bool cache)
+{
+ /*
+ * Fetch the dispatching info which consists of the implementation and
+ * the DType signature tuple. There are three steps:
+ *
+ * 1. Check the cache.
+ * 2. Check all registered loops/promoters to find the best match.
+ * 3. Fall back to the legacy implementation if no match was found.
+ */
+ PyObject *info = PyArrayIdentityHash_GetItem(ufunc->_dispatch_cache,
+ (PyObject **)op_dtypes);
+ if (info != NULL && PyObject_TypeCheck(
+ PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) {
+ /* Found the ArrayMethod and NOT a promoter: return it */
+ return info;
+ }
+
+ /*
+ * If `info == NULL`, the caching failed, repeat using the full resolution
+ * in `resolve_implementation_info`.
+ */
+ if (info == NULL) {
+ if (resolve_implementation_info(ufunc, op_dtypes, &info) < 0) {
+ return NULL;
+ }
+ if (info != NULL && PyObject_TypeCheck(
+ PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) {
+ /*
+ * Found the ArrayMethod and NOT promoter. Before returning it
+ * add it to the cache for faster lookup in the future.
+ */
+ if (cache && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
+ (PyObject **)op_dtypes, info, 0) < 0) {
+ return NULL;
+ }
+ return info;
+ }
+ }
+
+ /*
+ * At this point `info` is NULL if there is no matching loop, or it is
+ * a promoter that needs to be used/called:
+ */
+ if (info != NULL) {
+ PyObject *promoter = PyTuple_GET_ITEM(info, 1);
+
+ info = call_promoter_and_recurse(ufunc,
+ promoter, op_dtypes, signature, ops);
+ if (info == NULL && PyErr_Occurred()) {
+ return NULL;
+ }
+ else if (info != NULL) {
+ return info;
+ }
+ }
+
+ /*
+ * Even using promotion no loop was found.
+ * Using promotion failed, this should normally be an error.
+ * However, we need to give the legacy implementation a chance here.
+ * (it will modify `op_dtypes`).
+ */
+ if (!allow_legacy_promotion || ufunc->type_resolver == NULL ||
+ (ufunc->ntypes == 0 && ufunc->userloops == NULL)) {
+ /* Already tried or not a "legacy" ufunc (no loop found, return) */
+ return NULL;
+ }
+
+ PyArray_DTypeMeta *new_op_dtypes[NPY_MAXARGS] = {NULL};
+ int cacheable = 1; /* TODO: only the comparison deprecation needs this */
+ if (legacy_promote_using_legacy_type_resolver(ufunc,
+ ops, signature, new_op_dtypes, &cacheable) < 0) {
+ return NULL;
+ }
+ info = promote_and_get_info_and_ufuncimpl(ufunc,
+ ops, signature, new_op_dtypes, NPY_FALSE, cacheable);
+ for (int i = 0; i < ufunc->nargs; i++) {
+ Py_XDECREF(new_op_dtypes);
+ }
+ return info;
+}
+
+
+/**
+ * The central entry-point for the promotion and dispatching machinery.
+ *
+ * It currently may work with the operands (although it would be possible to
+ * only work with DType (classes/types). This is because it has to ensure
+ * that legacy (value-based promotion) is used when necessary.
+ *
+ * @param ufunc The ufunc object, used mainly for the fallback.
+ * @param ops The array operands (used only for the fallback).
+ * @param signature As input, the DType signature fixed explicitly by the user.
+ * The signature is *filled* in with the operation signature we end up
+ * using.
+ * @param op_dtypes The operand DTypes (without casting) which are specified
+ * either by the `signature` or by an `operand`.
+ * (outputs and the second input can be NULL for reductions).
+ * NOTE: In some cases, the promotion machinery may currently modify
+ * these.
+ * @param force_legacy_promotion If set, we have to use the old type resolution
+ * to implement value-based promotion/casting.
+ */
+NPY_NO_EXPORT PyArrayMethodObject *
+promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
+ PyArrayObject *const ops[],
+ PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *op_dtypes[],
+ npy_bool force_legacy_promotion,
+ npy_bool allow_legacy_promotion)
+{
+ int nargs = ufunc->nargs;
+
+ /*
+ * Get the actual DTypes we operate with by mixing the operand array
+ * ones with the passed signature.
+ */
+ for (int i = 0; i < nargs; i++) {
+ if (signature[i] != NULL) {
+ /*
+ * ignore the operand input, we cannot overwrite signature yet
+ * since it is fixed (cannot be promoted!)
+ */
+ Py_INCREF(signature[i]);
+ Py_XSETREF(op_dtypes[i], signature[i]);
+ assert(i >= ufunc->nin || !signature[i]->abstract);
+ }
+ }
+
+ if (force_legacy_promotion) {
+ /*
+ * We must use legacy promotion for value-based logic. Call the old
+ * resolver once up-front to get the "actual" loop dtypes.
+ * After this (additional) promotion, we can even use normal caching.
+ */
+ int cacheable = 1; /* unused, as we modify the original `op_dtypes` */
+ if (legacy_promote_using_legacy_type_resolver(ufunc,
+ ops, signature, op_dtypes, &cacheable) < 0) {
+ return NULL;
+ }
+ }
+
+ PyObject *info = promote_and_get_info_and_ufuncimpl(ufunc,
+ ops, signature, op_dtypes, allow_legacy_promotion, NPY_TRUE);
+
+ if (info == NULL) {
+ if (!PyErr_Occurred()) {
+ raise_no_loop_found_error(ufunc, (PyObject **)op_dtypes);
+ }
+ return NULL;
+ }
+
+ PyArrayMethodObject *method = (PyArrayMethodObject *)PyTuple_GET_ITEM(info, 1);
+
+ /* Fill `signature` with final DTypes used by the ArrayMethod/inner-loop */
+ PyObject *all_dtypes = PyTuple_GET_ITEM(info, 0);
+ for (int i = 0; i < nargs; i++) {
+ if (signature[i] == NULL) {
+ signature[i] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(all_dtypes, i);
+ Py_INCREF(signature[i]);
+ }
+ else {
+ assert((PyObject *)signature[i] == PyTuple_GET_ITEM(all_dtypes, i));
+ }
+ }
+
+ return method;
+}
diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h
new file mode 100644
index 000000000..cefad691f
--- /dev/null
+++ b/numpy/core/src/umath/dispatching.h
@@ -0,0 +1,22 @@
+#ifndef _NPY_DISPATCHING_H
+#define _NPY_DISPATCHING_H
+
+#define _UMATHMODULE
+
+#include <numpy/ufuncobject.h>
+#include "array_method.h"
+
+
+NPY_NO_EXPORT PyArrayMethodObject *
+promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
+ PyArrayObject *const ops[],
+ PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *op_dtypes[],
+ npy_bool force_legacy_promotion,
+ npy_bool allow_legacy_promotion);
+
+NPY_NO_EXPORT PyObject *
+add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
+ PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate);
+
+#endif /*_NPY_DISPATCHING_H */
diff --git a/numpy/core/src/umath/legacy_array_method.c b/numpy/core/src/umath/legacy_array_method.c
new file mode 100644
index 000000000..e5043aa71
--- /dev/null
+++ b/numpy/core/src/umath/legacy_array_method.c
@@ -0,0 +1,257 @@
+/*
+ * This file defines most of the machinery in order to wrap legacy style
+ * ufunc loops into new style arraymethods.
+ */
+
+#include <Python.h>
+
+#define _UMATHMODULE
+#define _MULTIARRAYMODULE
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#include "numpy/ndarraytypes.h"
+
+#include "convert_datatype.h"
+#include "array_method.h"
+#include "dtype_transfer.h"
+#include "legacy_array_method.h"
+
+
+typedef struct {
+ NpyAuxData base;
+ /* The legacy loop and additional user data: */
+ PyUFuncGenericFunction loop;
+ void *user_data;
+ /* Whether to check for PyErr_Occurred(), must require GIL if used */
+ int pyerr_check;
+} legacy_array_method_auxdata;
+
+
+/* Use a free list, since we should normally only need one at a time */
+#define NPY_LOOP_DATA_CACHE_SIZE 5
+static int loop_data_num_cached = 0;
+static legacy_array_method_auxdata *loop_data_cache[NPY_LOOP_DATA_CACHE_SIZE];
+
+
+static void
+legacy_array_method_auxdata_free(NpyAuxData *data)
+{
+ if (loop_data_num_cached < NPY_LOOP_DATA_CACHE_SIZE) {
+ loop_data_cache[loop_data_num_cached] = (
+ (legacy_array_method_auxdata *)data);
+ loop_data_num_cached++;
+ }
+ else {
+ PyMem_Free(data);
+ }
+}
+
+#undef NPY_LOOP_DATA_CACHE_SIZE
+
+
+NpyAuxData *
+get_new_loop_data(
+ PyUFuncGenericFunction loop, void *user_data, int pyerr_check)
+{
+ legacy_array_method_auxdata *data;
+ if (NPY_LIKELY(loop_data_num_cached > 0)) {
+ loop_data_num_cached--;
+ data = loop_data_cache[loop_data_num_cached];
+ }
+ else {
+ data = PyMem_Malloc(sizeof(legacy_array_method_auxdata));
+ if (data == NULL) {
+ return NULL;
+ }
+ data->base.free = legacy_array_method_auxdata_free;
+ data->base.clone = NULL; /* no need for cloning (at least for now) */
+ }
+ data->loop = loop;
+ data->user_data = user_data;
+ data->pyerr_check = pyerr_check;
+ return (NpyAuxData *)data;
+}
+
+
+/*
+ * This is a thin wrapper around the legacy loop signature.
+ */
+static int
+generic_wrapped_legacy_loop(PyArrayMethod_Context *NPY_UNUSED(context),
+ char *const *data, const npy_intp *dimensions, const npy_intp *strides,
+ NpyAuxData *auxdata)
+{
+ legacy_array_method_auxdata *ldata = (legacy_array_method_auxdata *)auxdata;
+
+ ldata->loop((char **)data, dimensions, strides, ldata->user_data);
+ if (ldata->pyerr_check && PyErr_Occurred()) {
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ * Signal that the old type-resolution function must be used to resolve
+ * the descriptors (mainly/only used for datetimes due to the unit).
+ *
+ * ArrayMethod's are expected to implement this, but it is too tricky
+ * to support properly. So we simply set an error that should never be seen.
+ */
+NPY_NO_EXPORT NPY_CASTING
+wrapped_legacy_resolve_descriptors(PyArrayMethodObject *NPY_UNUSED(self),
+ PyArray_DTypeMeta *NPY_UNUSED(dtypes[]),
+ PyArray_Descr *NPY_UNUSED(given_descrs[]),
+ PyArray_Descr *NPY_UNUSED(loop_descrs[]))
+{
+ PyErr_SetString(PyExc_RuntimeError,
+ "cannot use legacy wrapping ArrayMethod without calling the ufunc "
+ "itself. If this error is hit, the solution will be to port the "
+ "legacy ufunc loop implementation to the new API.");
+ return -1;
+}
+
+/*
+ * Much the same as the default type resolver, but tries a bit harder to
+ * preserve metadata.
+ */
+static NPY_CASTING
+simple_legacy_resolve_descriptors(
+ PyArrayMethodObject *method,
+ PyArray_DTypeMeta **dtypes,
+ PyArray_Descr **given_descrs,
+ PyArray_Descr **output_descrs)
+{
+ int nin = method->nin;
+ int nout = method->nout;
+
+ for (int i = 0; i < nin + nout; i++) {
+ if (given_descrs[i] != NULL) {
+ output_descrs[i] = ensure_dtype_nbo(given_descrs[i]);
+ }
+ else if (dtypes[i] == dtypes[0] && i > 0) {
+ /* Preserve metadata from the first operand if same dtype */
+ Py_INCREF(output_descrs[0]);
+ output_descrs[i] = output_descrs[0];
+ }
+ else {
+ output_descrs[i] = dtypes[i]->default_descr(dtypes[i]);
+ }
+ if (output_descrs[i] == NULL) {
+ goto fail;
+ }
+ }
+
+ return NPY_SAFE_CASTING;
+
+ fail:
+ for (int i = 0; i < nin + nout; i++) {
+ Py_CLEAR(output_descrs[i]);
+ }
+ return -1;
+}
+
+
+/*
+ * This function grabs the legacy inner-loop. If this turns out to be slow
+ * we could probably cache it (with some care).
+ */
+NPY_NO_EXPORT int
+get_wrapped_legacy_ufunc_loop(PyArrayMethod_Context *context,
+ int aligned, int move_references,
+ npy_intp *NPY_UNUSED(strides),
+ PyArrayMethod_StridedLoop **out_loop,
+ NpyAuxData **out_transferdata,
+ NPY_ARRAYMETHOD_FLAGS *flags)
+{
+ assert(aligned);
+ assert(!move_references);
+
+ if (context->caller == NULL ||
+ !PyObject_TypeCheck(context->caller, &PyUFunc_Type)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "cannot call %s without its ufunc as caller context.",
+ context->method->name);
+ return -1;
+ }
+
+ PyUFuncObject *ufunc = (PyUFuncObject *)context->caller;
+ void *user_data;
+ int needs_api = 0;
+
+ PyUFuncGenericFunction loop = NULL;
+ /* Note that `needs_api` is not reliable (it was in fact unused normally) */
+ if (ufunc->legacy_inner_loop_selector(ufunc,
+ context->descriptors, &loop, &user_data, &needs_api) < 0) {
+ return -1;
+ }
+ *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS;
+ if (needs_api) {
+ *flags |= NPY_METH_REQUIRES_PYAPI;
+ }
+
+ *out_loop = &generic_wrapped_legacy_loop;
+ *out_transferdata = get_new_loop_data(
+ loop, user_data, (*flags & NPY_METH_REQUIRES_PYAPI) != 0);
+ return 0;
+}
+
+
+/*
+ * Get the unbound ArrayMethod which wraps the instances of the ufunc.
+ * Note that this function stores the result on the ufunc and then only
+ * returns the same one.
+ */
+NPY_NO_EXPORT PyArrayMethodObject *
+PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc,
+ PyArray_DTypeMeta *signature[])
+{
+ char method_name[101];
+ const char *name = ufunc->name ? ufunc->name : "<unknown>";
+ snprintf(method_name, 100, "legacy_ufunc_wrapper_for_%s", name);
+
+ /*
+ * Assume that we require the Python API when any of the (legacy) dtypes
+ * flags it.
+ */
+ int any_output_flexible = 0;
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+
+ for (int i = 0; i < ufunc->nin+ufunc->nout; i++) {
+ if (signature[i]->singleton->flags & (
+ NPY_ITEM_REFCOUNT | NPY_ITEM_IS_POINTER | NPY_NEEDS_PYAPI)) {
+ flags |= NPY_METH_REQUIRES_PYAPI;
+ }
+ if (signature[i]->parametric) {
+ any_output_flexible = 1;
+ }
+ }
+
+ PyType_Slot slots[3] = {
+ {NPY_METH_get_loop, &get_wrapped_legacy_ufunc_loop},
+ {NPY_METH_resolve_descriptors, &simple_legacy_resolve_descriptors},
+ {0, NULL},
+ };
+ if (any_output_flexible) {
+ /* We cannot use the default descriptor resolver. */
+ slots[1].pfunc = &wrapped_legacy_resolve_descriptors;
+ }
+
+ PyArrayMethod_Spec spec = {
+ .name = method_name,
+ .nin = ufunc->nin,
+ .nout = ufunc->nout,
+ .dtypes = signature,
+ .flags = flags,
+ .slots = slots,
+ .casting = NPY_EQUIV_CASTING,
+ };
+
+ PyBoundArrayMethodObject *bound_res = PyArrayMethod_FromSpec_int(&spec, 1);
+ if (bound_res == NULL) {
+ return NULL;
+ }
+ PyArrayMethodObject *res = bound_res->method;
+ Py_INCREF(res);
+ Py_DECREF(bound_res);
+ return res;
+}
diff --git a/numpy/core/src/umath/legacy_array_method.h b/numpy/core/src/umath/legacy_array_method.h
new file mode 100644
index 000000000..0dec1fb3a
--- /dev/null
+++ b/numpy/core/src/umath/legacy_array_method.h
@@ -0,0 +1,33 @@
+#ifndef _NPY_LEGACY_ARRAY_METHOD_H
+#define _NPY_LEGACY_ARRAY_METHOD_H
+
+#include "numpy/ndarraytypes.h"
+#include "numpy/ufuncobject.h"
+#include "array_method.h"
+
+
+NPY_NO_EXPORT PyArrayMethodObject *
+PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc,
+ PyArray_DTypeMeta *signature[]);
+
+
+
+/*
+ * The following two symbols are in the header so that other places can use
+ * them to probe for special cases (or whether an ArrayMethod is a "legacy"
+ * one).
+ */
+NPY_NO_EXPORT int
+get_wrapped_legacy_ufunc_loop(PyArrayMethod_Context *context,
+ int aligned, int move_references,
+ npy_intp *NPY_UNUSED(strides),
+ PyArrayMethod_StridedLoop **out_loop,
+ NpyAuxData **out_transferdata,
+ NPY_ARRAYMETHOD_FLAGS *flags);
+
+NPY_NO_EXPORT NPY_CASTING
+wrapped_legacy_resolve_descriptors(PyArrayMethodObject *,
+ PyArray_DTypeMeta **, PyArray_Descr **, PyArray_Descr **);
+
+
+#endif /*_NPY_LEGACY_ARRAY_METHOD_H */
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 60a315f6e..5a32ae603 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -41,6 +41,7 @@
#include "ufunc_type_resolution.h"
#include "reduction.h"
#include "mem_overlap.h"
+#include "npy_hashtable.h"
#include "ufunc_object.h"
#include "override.h"
@@ -49,7 +50,10 @@
#include "common.h"
#include "dtypemeta.h"
#include "numpyos.h"
+#include "dispatching.h"
#include "convert_datatype.h"
+#include "legacy_array_method.h"
+#include "abstractdtypes.h"
/********** PRINTF DEBUG TRACING **************/
#define NPY_UF_DBG_TRACING 0
@@ -101,6 +105,12 @@ _get_wrap_prepare_args(ufunc_full_args full_args) {
static PyObject *
prepare_input_arguments_for_outer(PyObject *args, PyUFuncObject *ufunc);
+static int
+resolve_descriptors(int nop,
+ PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl,
+ PyArrayObject *operands[], PyArray_Descr *dtypes[],
+ PyArray_DTypeMeta *signature[], NPY_CASTING casting);
+
/*UFUNC_API*/
NPY_NO_EXPORT int
@@ -911,7 +921,9 @@ _wheremask_converter(PyObject *obj, PyArrayObject **wheremask)
*/
static int
convert_ufunc_arguments(PyUFuncObject *ufunc,
- ufunc_full_args full_args, PyArrayObject **out_op,
+ ufunc_full_args full_args, PyArrayObject *out_op[],
+ PyArray_DTypeMeta *out_op_DTypes[],
+ npy_bool *force_legacy_promotion, npy_bool *allow_legacy_promotion,
PyObject *order_obj, NPY_ORDER *out_order,
PyObject *casting_obj, NPY_CASTING *out_casting,
PyObject *subok_obj, npy_bool *out_subok,
@@ -924,21 +936,55 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
PyObject *obj;
/* Convert and fill in input arguments */
+ npy_bool all_scalar = NPY_TRUE;
+ npy_bool any_scalar = NPY_FALSE;
+ *allow_legacy_promotion = NPY_TRUE;
+ *force_legacy_promotion = NPY_FALSE;
for (int i = 0; i < nin; i++) {
obj = PyTuple_GET_ITEM(full_args.in, i);
if (PyArray_Check(obj)) {
- PyArrayObject *obj_a = (PyArrayObject *)obj;
- out_op[i] = (PyArrayObject *)PyArray_FromArray(obj_a, NULL, 0);
+ out_op[i] = (PyArrayObject *)obj;
+ Py_INCREF(out_op[i]);
}
else {
- out_op[i] = (PyArrayObject *)PyArray_FromAny(obj,
- NULL, 0, 0, 0, NULL);
+ /* Convert the input to an array and check for special cases */
+ out_op[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, NULL);
+ if (out_op[i] == NULL) {
+ goto fail;
+ }
}
+ out_op_DTypes[i] = NPY_DTYPE(PyArray_DESCR(out_op[i]));
+ Py_INCREF(out_op_DTypes[i]);
- if (out_op[i] == NULL) {
- goto fail;
+ if (!out_op_DTypes[i]->legacy) {
+ *allow_legacy_promotion = NPY_FALSE;
+ }
+ if (PyArray_NDIM(out_op[i]) == 0) {
+ any_scalar = NPY_TRUE;
+ }
+ else {
+ all_scalar = NPY_FALSE;
+ continue;
}
+ /*
+ * TODO: we need to special case scalars here, if the input is a
+ * Python int, float, or complex, we have to use the "weak"
+ * DTypes: `PyArray_PyIntAbstractDType`, etc.
+ * This is to allow e.g. `float32(1.) + 1` to return `float32`.
+ * The correct array dtype can only be found after promotion for
+ * such a "weak scalar". We could avoid conversion here, but
+ * must convert it for use in the legacy promotion.
+ * There is still a small chance that this logic can instead
+ * happen inside the Python operators.
+ */
+ }
+ if (*allow_legacy_promotion && (!all_scalar && any_scalar)) {
+ *force_legacy_promotion = should_use_min_scalar(nin, out_op, 0, NULL);
+ /*
+ * TODO: if this is False, we end up in a "very slow" path that should
+ * be avoided. This makes `int_arr + 0.` ~40% slower.
+ */
}
/* Convert and fill in output arguments */
@@ -948,6 +994,10 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
if (_set_out_array(obj, out_op + i + nin) < 0) {
goto fail;
}
+ if (out_op[i] != NULL) {
+ out_op_DTypes[i + nin] = NPY_DTYPE(PyArray_DESCR(out_op[i]));
+ Py_INCREF(out_op_DTypes[i + nin]);
+ }
}
}
@@ -991,11 +1041,11 @@ fail:
* -1 if there is an error.
*/
static int
-check_for_trivial_loop(PyUFuncObject *ufunc,
+check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
PyArrayObject **op, PyArray_Descr **dtypes,
- npy_intp buffersize)
+ NPY_CASTING casting, npy_intp buffersize)
{
- int i, nin = ufunc->nin, nop = nin + ufunc->nout;
+ int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout;
for (i = 0; i < nop; ++i) {
/*
@@ -1017,6 +1067,10 @@ check_for_trivial_loop(PyUFuncObject *ufunc,
if (!(safety & _NPY_CAST_IS_VIEW)) {
must_copy = 1;
}
+
+ if (PyArray_MinCastSafety(safety, casting) != casting) {
+ return 0; /* the cast is not safe enough */
+ }
}
if (must_copy) {
/*
@@ -1132,14 +1186,15 @@ prepare_ufunc_output(PyUFuncObject *ufunc,
*
* Returns -2 if a trivial loop is not possible, 0 on success and -1 on error.
*/
-static NPY_INLINE int
-try_trivial_single_output_loop(PyUFuncObject *ufunc,
- PyArrayObject *op[], PyArray_Descr *dtypes[],
- NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args)
+static int
+try_trivial_single_output_loop(PyArrayMethod_Context *context,
+ PyArrayObject *op[], NPY_ORDER order,
+ PyObject *arr_prep[], ufunc_full_args full_args,
+ int errormask, PyObject *extobj)
{
- int nin = ufunc->nin;
+ int nin = context->method->nin;
int nop = nin + 1;
- assert(ufunc->nout == 1);
+ assert(context->method->nout == 1);
/* The order of all N-D contiguous operands, can be fixed by `order` */
int operation_order = 0;
@@ -1204,14 +1259,14 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc,
}
if (op[nin] == NULL) {
- Py_INCREF(dtypes[nin]);
+ Py_INCREF(context->descriptors[nin]);
op[nin] = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type,
- dtypes[nin], operation_ndim, operation_shape,
+ context->descriptors[nin], operation_ndim, operation_shape,
NULL, NULL, operation_order==NPY_ARRAY_F_CONTIGUOUS, NULL);
if (op[nin] == NULL) {
return -1;
}
- fixed_strides[nin] = dtypes[nin]->elsize;
+ fixed_strides[nin] = context->descriptors[nin]->elsize;
}
else {
/* If any input overlaps with the output, we use the full path. */
@@ -1232,7 +1287,7 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc,
}
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(ufunc, &op[nin],
+ if (prepare_ufunc_output((PyUFuncObject *)context->caller, &op[nin],
arr_prep[0], full_args, 0) < 0) {
return -1;
}
@@ -1243,46 +1298,88 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc,
*/
char *data[NPY_MAXARGS];
npy_intp count = PyArray_MultiplyList(operation_shape, operation_ndim);
- int needs_api = 0;
NPY_BEGIN_THREADS_DEF;
- PyUFuncGenericFunction innerloop;
- void *innerloopdata = NULL;
- if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &innerloop, &innerloopdata, &needs_api) < 0) {
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata = NULL;
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (context->method->get_strided_loop(context,
+ 1, 0, fixed_strides,
+ &strided_loop, &auxdata, &flags) < 0) {
return -1;
}
-
- for (int iop = 0; iop < nop; iop++) {
+ for (int iop=0; iop < nop; iop++) {
data[iop] = PyArray_BYTES(op[iop]);
- needs_api |= PyDataType_REFCHK(dtypes[iop]);
}
- if (!needs_api) {
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier((char *)context);
+ }
+ if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_BEGIN_THREADS_THRESHOLDED(count);
}
- innerloop(data, &count, fixed_strides, innerloopdata);
+ int res = strided_loop(context, data, &count, fixed_strides, auxdata);
NPY_END_THREADS;
+ NPY_AUXDATA_FREE(auxdata);
+
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ const char *name = ufunc_get_name_cstr((PyUFuncObject *)context->caller);
+ res = _check_ufunc_fperr(errormask, extobj, name);
+ }
+ return res;
+}
+
+
+/*
+ * Check casting: It would be nice to just move this into the iterator
+ * or pass in the full cast information. But this can special case
+ * the logical functions and prints a better error message.
+ */
+static NPY_INLINE int
+validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc,
+ PyArrayObject *ops[], PyArray_Descr *descriptors[],
+ NPY_CASTING casting)
+{
+ if (method->resolve_descriptors == &wrapped_legacy_resolve_descriptors) {
+ /*
+ * In this case the legacy type resolution was definitely called
+ * and we do not need to check (astropy/pyerfa relied on this).
+ */
+ return 0;
+ }
+ if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) {
+ return -1;
+ }
return 0;
}
+/*
+ * The ufunc loop implementation for both normal ufunc calls and masked calls
+ * when the iterator has to be used.
+ *
+ * See `PyUFunc_GenericFunctionInternal` for more information (where this is
+ * called from).
+ */
static int
-execute_ufunc_loop(PyUFuncObject *ufunc,
- int masked,
- PyArrayObject **op,
- PyArray_Descr **dtypes,
- NPY_ORDER order,
- npy_intp buffersize,
- PyObject **arr_prep,
- ufunc_full_args full_args,
- npy_uint32 *op_flags)
+execute_ufunc_loop(PyArrayMethod_Context *context, int masked,
+ PyArrayObject **op, NPY_ORDER order, npy_intp buffersize,
+ NPY_CASTING casting,
+ PyObject **arr_prep, ufunc_full_args full_args,
+ npy_uint32 *op_flags, int errormask, PyObject *extobj)
{
- int nin = ufunc->nin, nout = ufunc->nout;
+ PyUFuncObject *ufunc = (PyUFuncObject *)context->caller;
+ int nin = context->method->nin, nout = context->method->nout;
int nop = nin + nout;
+ if (validate_casting(context->method,
+ ufunc, op, context->descriptors, casting) < 0) {
+ return -1;
+ }
+
if (masked) {
assert(PyArray_TYPE(op[nop]) == NPY_BOOL);
if (ufunc->_always_null_previously_masked_innerloop_selector != NULL) {
@@ -1345,7 +1442,7 @@ execute_ufunc_loop(PyUFuncObject *ufunc,
NpyIter *iter = NpyIter_AdvancedNew(nop + masked, op,
iter_flags,
order, NPY_UNSAFE_CASTING,
- op_flags, dtypes,
+ op_flags, context->descriptors,
-1, NULL, NULL, buffersize);
if (iter == NULL) {
return -1;
@@ -1410,22 +1507,25 @@ execute_ufunc_loop(PyUFuncObject *ufunc,
}
/*
- * Get the inner loop.
+ * Get the inner loop, with the possibility of specialization
+ * based on the fixed strides.
*/
- int needs_api = 0;
- PyUFuncGenericFunction innerloop;
- void *innerloopdata = NULL;
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata;
+ npy_intp fixed_strides[NPY_MAXARGS];
+
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
if (masked) {
- if (PyUFunc_DefaultMaskedInnerLoopSelector(ufunc,
- dtypes, &innerloop, (NpyAuxData **)&innerloopdata,
- &needs_api) < 0) {
+ if (PyArrayMethod_GetMaskedStridedLoop(context,
+ 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
NpyIter_Deallocate(iter);
return -1;
}
}
else {
- if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &innerloop, &innerloopdata, &needs_api) < 0) {
+ if (context->method->get_strided_loop(context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
NpyIter_Deallocate(iter);
return -1;
}
@@ -1434,87 +1534,45 @@ execute_ufunc_loop(PyUFuncObject *ufunc,
/* Get the variables needed for the loop */
NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
+ NPY_AUXDATA_FREE(auxdata);
NpyIter_Deallocate(iter);
- if (masked) {
- NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata);
- }
return -1;
}
char **dataptr = NpyIter_GetDataPtrArray(iter);
npy_intp *strides = NpyIter_GetInnerStrideArray(iter);
npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter);
- needs_api |= NpyIter_IterationNeedsAPI(iter);
+ int needs_api = NpyIter_IterationNeedsAPI(iter);
NPY_BEGIN_THREADS_DEF;
- if (!needs_api) {
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier((char *)context);
+ }
+ if (!needs_api && !(flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_BEGIN_THREADS_THRESHOLDED(full_size);
}
NPY_UF_DBG_PRINT("Actual inner loop:\n");
/* Execute the loop */
+ int res;
do {
- NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr);
- innerloop(dataptr, countptr, strides, innerloopdata);
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+ NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr);
+ res = strided_loop(context, dataptr, countptr, strides, auxdata);
+ } while (res == 0 && iternext(iter));
NPY_END_THREADS;
- if (masked) {
- NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata);
- }
+ NPY_AUXDATA_FREE(auxdata);
- /*
- * Currently `innerloop` may leave an error set, in this case
- * NpyIter_Deallocate will always return an error as well.
- */
- if (NpyIter_Deallocate(iter) == NPY_FAIL) {
- return -1;
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ const char *name = ufunc_get_name_cstr((PyUFuncObject *)context->caller);
+ res = _check_ufunc_fperr(errormask, extobj, name);
}
- return 0;
-}
-/*
- * ufunc - the ufunc to call
- * trivial_loop_ok - 1 if no alignment, data conversion, etc required
- * op - the operands (ufunc->nin + ufunc->nout of them)
- * dtypes - the dtype of each operand
- * order - the loop execution order/output memory order
- * buffersize - how big of a buffer to use
- * arr_prep - the __array_prepare__ functions for the outputs
- * full_args - the original input, output PyObject *
- * op_flags - per-operand flags, a combination of NPY_ITER_* constants
- */
-static int
-execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
- int trivial_loop_ok,
- PyArrayObject **op,
- PyArray_Descr **dtypes,
- NPY_ORDER order,
- npy_intp buffersize,
- PyObject **arr_prep,
- ufunc_full_args full_args,
- npy_uint32 *op_flags)
-{
- /* First check for the trivial cases that don't need an iterator */
- if (trivial_loop_ok && ufunc->nout == 1) {
- int fast_path_result = try_trivial_single_output_loop(ufunc,
- op, dtypes, order, arr_prep, full_args);
- if (fast_path_result != -2) {
- return fast_path_result;
- }
- }
-
- /*
- * If no trivial loop matched, an iterator is required to
- * resolve broadcasting, etc
- */
- NPY_UF_DBG_PRINT("iterator loop\n");
- if (execute_ufunc_loop(ufunc, 0, op, dtypes, order,
- buffersize, arr_prep, full_args, op_flags) < 0) {
+ if (!NpyIter_Deallocate(iter)) {
return -1;
}
-
- return 0;
+ return res;
}
@@ -2007,9 +2065,9 @@ _initialize_variable_parts(PyUFuncObject *ufunc,
static int
PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
- PyArray_Descr *operation_descrs[],
+ PyArrayMethodObject *ufuncimpl, PyArray_Descr *operation_descrs[],
PyArrayObject *op[], PyObject *extobj,
- NPY_ORDER order,
+ NPY_CASTING casting, NPY_ORDER order,
PyObject *axis, PyObject *axes, int keepdims)
{
int nin, nout;
@@ -2034,13 +2092,12 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
/* The dimensions which get passed to the inner loop */
npy_intp inner_dimensions[NPY_MAXDIMS+1];
/* The strides which get passed to the inner loop */
npy_intp *inner_strides = NULL;
+ /* Auxiliary data allocated by the ufuncimpl (ArrayMethod) */
+ NpyAuxData *auxdata = NULL;
/* The sizes of the core dimensions (# entries is ufunc->core_num_dim_ix) */
npy_intp *core_dim_sizes = inner_dimensions + 1;
@@ -2057,6 +2114,11 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
+ if (validate_casting(ufuncimpl,
+ ufunc, op, operation_descrs, casting) < 0) {
+ return -1;
+ }
+
/* Initialize possibly variable parts to the values from the ufunc */
retval = _initialize_variable_parts(ufunc, op_core_num_dims,
core_dim_sizes, core_dim_flags);
@@ -2274,18 +2336,11 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
NPY_ITER_WRITEONLY |
NPY_UFUNC_DEFAULT_OUTPUT_FLAGS,
op_flags);
- /* For the generalized ufunc, we get the loop right away too */
- retval = ufunc->legacy_inner_loop_selector(ufunc,
- operation_descrs, &innerloop, &innerloopdata, &needs_api);
- if (retval < 0) {
- goto fail;
- }
/*
* Set up the iterator per-op flags. For generalized ufuncs, we
* can't do buffering, so must COPY or UPDATEIFCOPY.
*/
-
iter_flags = ufunc->iter_flags |
NPY_ITER_MULTI_INDEX |
NPY_ITER_REFS_OK |
@@ -2394,21 +2449,34 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
/*
* The first nop strides are for the inner loop (but only can
- * copy them after removing the core axes)
+ * copy them after removing the core axes). The strides will not change
+ * if the iterator is not buffered (they are effectively fixed).
+ * Supporting buffering would make sense, but probably would have to be
+ * done in the inner-loop itself (not the iterator).
*/
+ assert(!NpyIter_IsBuffered(iter));
memcpy(inner_strides, NpyIter_GetInnerStrideArray(iter),
NPY_SIZEOF_INTP * nop);
-#if 0
- printf("strides: ");
- for (i = 0; i < nop+core_dim_ixs_size; ++i) {
- printf("%d ", (int)inner_strides[i]);
+ /* Final preparation of the arraymethod call */
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
+ };
+ PyArrayMethod_StridedLoop *strided_loop;
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+
+ if (ufuncimpl->get_strided_loop(&context, 1, 0, inner_strides,
+ &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
}
- printf("\n");
-#endif
-
- /* Start with the floating-point exception flags cleared */
- npy_clear_floatstatus_barrier((char*)&iter);
NPY_UF_DBG_PRINT("Executing inner loop\n");
@@ -2427,29 +2495,28 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
}
dataptr = NpyIter_GetDataPtrArray(iter);
count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
- needs_api = NpyIter_IterationNeedsAPI(iter);
- if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) {
+ if (!needs_api) {
NPY_BEGIN_THREADS_THRESHOLDED(total_problem_size);
}
do {
inner_dimensions[0] = *count_ptr;
- innerloop(dataptr, inner_dimensions, inner_strides, innerloopdata);
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+ retval = strided_loop(&context,
+ dataptr, inner_dimensions, inner_strides, auxdata);
+ } while (retval == 0 && iternext(iter));
if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) {
NPY_END_THREADS;
}
}
- /* Check whether any errors occurred during the loop */
- if (PyErr_Occurred() ||
- _check_ufunc_fperr(errormask, extobj, ufunc_name) < 0) {
- retval = -1;
- goto fail;
+ if (retval == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ retval = _check_ufunc_fperr(errormask, extobj, ufunc_name);
}
PyArray_free(inner_strides);
+ NPY_AUXDATA_FREE(auxdata);
if (NpyIter_Deallocate(iter) < 0) {
retval = -1;
}
@@ -2464,6 +2531,7 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
fail:
NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval);
PyArray_free(inner_strides);
+ NPY_AUXDATA_FREE(auxdata);
NpyIter_Deallocate(iter);
PyArray_free(remap_axis_memory);
PyArray_free(remap_axis);
@@ -2473,17 +2541,18 @@ fail:
static int
PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
- PyArray_Descr *operation_descrs[],
- PyArrayObject *op[], PyObject *extobj, NPY_ORDER order,
+ PyArrayMethodObject *ufuncimpl, PyArray_Descr *operation_descrs[],
+ PyArrayObject *op[], PyObject *extobj,
+ NPY_CASTING casting, NPY_ORDER order,
PyObject *output_array_prepare[], ufunc_full_args full_args,
PyArrayObject *wheremask)
{
int nin = ufunc->nin, nout = ufunc->nout, nop = nin + nout;
- const char *ufunc_name = ufunc_name = ufunc_get_name_cstr(ufunc);;
- int retval = -1;
- npy_uint32 op_flags[NPY_MAXARGS];
+ const char *ufunc_name = ufunc_get_name_cstr(ufunc);
+
npy_intp default_op_out_flags;
+ npy_uint32 op_flags[NPY_MAXARGS];
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -2495,8 +2564,6 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
return -1;
}
- NPY_UF_DBG_PRINT("Finding inner loop\n");
-
if (wheremask != NULL) {
/* Set up the flags. */
default_op_out_flags = NPY_ITER_NO_SUBTYPE |
@@ -2513,6 +2580,13 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
default_op_out_flags, op_flags);
}
+ /* Final preparation of the arraymethod call */
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
+ };
+
/* Do the ufunc loop */
if (wheremask != NULL) {
NPY_UF_DBG_PRINT("Executing masked inner loop\n");
@@ -2525,52 +2599,38 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
op[nop] = wheremask;
operation_descrs[nop] = NULL;
- /* Set up the flags */
-
- npy_clear_floatstatus_barrier((char*)&ufunc);
- retval = execute_ufunc_loop(ufunc, 1,
- op, operation_descrs, order,
- buffersize, output_array_prepare,
- full_args, op_flags);
+ return execute_ufunc_loop(&context, 1,
+ op, order, buffersize, casting,
+ output_array_prepare, full_args, op_flags,
+ errormask, extobj);
}
else {
- NPY_UF_DBG_PRINT("Executing legacy inner loop\n");
+ NPY_UF_DBG_PRINT("Executing normal inner loop\n");
/*
* This checks whether a trivial loop is ok, making copies of
- * scalar and one dimensional operands if that will help.
- * Since it requires dtypes, it can only be called after
- * ufunc->type_resolver
+ * scalar and one dimensional operands if that should help.
*/
- int trivial_ok = check_for_trivial_loop(ufunc,
- op, operation_descrs, buffersize);
+ int trivial_ok = check_for_trivial_loop(ufuncimpl,
+ op, operation_descrs, casting, buffersize);
if (trivial_ok < 0) {
return -1;
}
+ if (trivial_ok && context.method->nout == 1) {
+ /* Try to handle everything without using the (heavy) iterator */
+ int retval = try_trivial_single_output_loop(&context,
+ op, order, output_array_prepare, full_args,
+ errormask, extobj);
+ if (retval != -2) {
+ return retval;
+ }
+ }
- /* check_for_trivial_loop on half-floats can overflow */
- npy_clear_floatstatus_barrier((char*)&ufunc);
-
- retval = execute_legacy_ufunc_loop(ufunc, trivial_ok,
- op, operation_descrs, order,
- buffersize, output_array_prepare,
- full_args, op_flags);
- }
- if (retval < 0) {
- return -1;
- }
-
- /*
- * Check whether any errors occurred during the loop. The loops should
- * indicate this in retval, but since the inner-loop currently does not
- * report errors, this does not happen in all branches (at this time).
- */
- if (PyErr_Occurred() ||
- _check_ufunc_fperr(errormask, extobj, ufunc_name) < 0) {
- return -1;
+ return execute_ufunc_loop(&context, 0,
+ op, order, buffersize, casting,
+ output_array_prepare, full_args, op_flags,
+ errormask, extobj);
}
-
- return retval;
}
@@ -4248,83 +4308,30 @@ _get_dtype(PyObject *dtype_obj) {
}
-static int
-_make_new_typetup(
- int nop, PyArray_DTypeMeta *signature[], PyObject **out_typetup) {
- *out_typetup = PyTuple_New(nop);
- if (*out_typetup == NULL) {
- return -1;
- }
-
- int noncount = 0;
- for (int i = 0; i < nop; i++) {
- PyObject *item;
- if (signature[i] == NULL) {
- item = Py_None;
- noncount++;
- }
- else {
- if (!signature[i]->legacy || signature[i]->abstract) {
- /*
- * The legacy type resolution can't deal with these.
- * This path will return `None` or so in the future to
- * set an error later if the legacy type resolution is used.
- */
- PyErr_SetString(PyExc_RuntimeError,
- "Internal NumPy error: new DType in signature not yet "
- "supported. (This should be unreachable code!)");
- Py_SETREF(*out_typetup, NULL);
- return -1;
- }
- item = (PyObject *)signature[i]->singleton;
- }
- Py_INCREF(item);
- PyTuple_SET_ITEM(*out_typetup, i, item);
- }
- if (noncount == nop) {
- /* The whole signature was None, simply ignore type tuple */
- Py_DECREF(*out_typetup);
- *out_typetup = NULL;
- }
- return 0;
-}
-
-
/*
- * Finish conversion parsing of the type tuple. NumPy always only honored
- * the type number for passed in descriptors/dtypes.
+ * Finish conversion parsing of the DType signature. NumPy always only
+ * honored the type number for passed in descriptors/dtypes.
* The `dtype` argument is interpreted as the first output DType (not
* descriptor).
* Unlike the dtype of an `out` array, it influences loop selection!
*
- * NOTE: This function replaces the type tuple if passed in (it steals
- * the original reference and returns a new object and reference)!
- * The caller must XDECREF the type tuple both on error or success.
- *
- * The function returns a new, normalized type-tuple.
+ * It is the callers responsibility to clean `signature` and NULL it before
+ * calling.
*/
static int
-_get_normalized_typetup(PyUFuncObject *ufunc,
- PyObject *dtype_obj, PyObject *signature_obj, PyObject **out_typetup)
+_get_fixed_signature(PyUFuncObject *ufunc,
+ PyObject *dtype_obj, PyObject *signature_obj,
+ PyArray_DTypeMeta **signature)
{
if (dtype_obj == NULL && signature_obj == NULL) {
return 0;
}
- int res = -1;
int nin = ufunc->nin, nout = ufunc->nout, nop = nin + nout;
- /*
- * TODO: `signature` will be the main result in the future and
- * not the typetup. (Type tuple construction can be deffered to when
- * the legacy fallback is used).
- */
- PyArray_DTypeMeta *signature[NPY_MAXARGS];
- memset(signature, '\0', sizeof(*signature) * nop);
if (dtype_obj != NULL) {
if (dtype_obj == Py_None) {
/* If `dtype=None` is passed, no need to do anything */
- assert(*out_typetup == NULL);
return 0;
}
if (nout == 0) {
@@ -4342,8 +4349,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
signature[i] = dtype;
}
Py_DECREF(dtype);
- res = _make_new_typetup(nop, signature, out_typetup);
- goto finish;
+ return 0;
}
assert(signature_obj != NULL);
@@ -4359,32 +4365,46 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
if (PyTuple_GET_ITEM(signature_obj, 0) == Py_None) {
PyErr_SetString(PyExc_TypeError,
"a single item type tuple cannot contain None.");
- goto finish;
+ return -1;
}
if (DEPRECATE("The use of a length 1 tuple for the ufunc "
"`signature` is deprecated. Use `dtype` or fill the"
"tuple with `None`s.") < 0) {
- goto finish;
+ return -1;
}
/* Use the same logic as for `dtype=` */
- res = _get_normalized_typetup(ufunc,
- PyTuple_GET_ITEM(signature_obj, 0), NULL, out_typetup);
- goto finish;
+ return _get_fixed_signature(ufunc,
+ PyTuple_GET_ITEM(signature_obj, 0), NULL, signature);
}
if (n != nop) {
PyErr_Format(PyExc_ValueError,
"a type-tuple must be specified of length %d for ufunc '%s'",
nop, ufunc_get_name_cstr(ufunc));
- goto finish;
+ return -1;
}
for (int i = 0; i < nop; ++i) {
PyObject *item = PyTuple_GET_ITEM(signature_obj, i);
if (item == Py_None) {
continue;
}
- signature[i] = _get_dtype(item);
- if (signature[i] == NULL) {
- goto finish;
+ else {
+ signature[i] = _get_dtype(item);
+ if (signature[i] == NULL) {
+ return -1;
+ }
+ else if (i < nin && signature[i]->abstract) {
+ /*
+ * We reject abstract input signatures for now. These
+ * can probably be defined by finding the common DType with
+ * the actual input and using the result of this for the
+ * promotion.
+ */
+ PyErr_SetString(PyExc_TypeError,
+ "Input DTypes to the signature must not be "
+ "abstract. The behaviour may be defined in the "
+ "future.");
+ return -1;
+ }
}
}
}
@@ -4394,7 +4414,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
if (PyBytes_Check(signature_obj)) {
str_object = PyUnicode_FromEncodedObject(signature_obj, NULL, NULL);
if (str_object == NULL) {
- goto finish;
+ return -1;
}
}
else {
@@ -4406,7 +4426,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
const char *str = PyUnicode_AsUTF8AndSize(str_object, &length);
if (str == NULL) {
Py_DECREF(str_object);
- goto finish;
+ return -1;
}
if (length != 1 && (length != nin+nout + 2 ||
@@ -4415,18 +4435,17 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
"a type-string for %s, %d typecode(s) before and %d after "
"the -> sign", ufunc_get_name_cstr(ufunc), nin, nout);
Py_DECREF(str_object);
- goto finish;
+ return -1;
}
if (length == 1 && nin+nout != 1) {
Py_DECREF(str_object);
if (DEPRECATE("The use of a length 1 string for the ufunc "
"`signature` is deprecated. Use `dtype` attribute or "
"pass a tuple with `None`s.") < 0) {
- goto finish;
+ return -1;
}
/* `signature="l"` is the same as `dtype="l"` */
- res = _get_normalized_typetup(ufunc, str_object, NULL, out_typetup);
- goto finish;
+ return _get_fixed_signature(ufunc, str_object, NULL, signature);
}
else {
for (int i = 0; i < nin+nout; ++i) {
@@ -4434,7 +4453,7 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
PyArray_Descr *descr = PyArray_DescrFromType(str[istr]);
if (descr == NULL) {
Py_DECREF(str_object);
- goto finish;
+ return -1;
}
signature[i] = NPY_DTYPE(descr);
Py_INCREF(signature[i]);
@@ -4446,15 +4465,79 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
else {
PyErr_SetString(PyExc_TypeError,
"the signature object to ufunc must be a string or a tuple.");
- goto finish;
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ * Fill in the actual descriptors used for the operation. This function
+ * supports falling back to the legacy `ufunc->type_resolver`.
+ *
+ * We guarantee the array-method that all passed in descriptors are of the
+ * correct DType instance (i.e. a string can just fetch the length, it doesn't
+ * need to "cast" to string first).
+ */
+static int
+resolve_descriptors(int nop,
+ PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl,
+ PyArrayObject *operands[], PyArray_Descr *dtypes[],
+ PyArray_DTypeMeta *signature[], NPY_CASTING casting)
+{
+ int retval = -1;
+ PyArray_Descr *original_dtypes[NPY_MAXARGS];
+
+ for (int i = 0; i < nop; ++i) {
+ if (operands[i] == NULL) {
+ original_dtypes[i] = NULL;
+ }
+ else {
+ /*
+ * The dtype may mismatch the signature, in which case we need
+ * to make it fit before calling the resolution.
+ */
+ PyArray_Descr *descr = PyArray_DTYPE(operands[i]);
+ original_dtypes[i] = PyArray_CastDescrToDType(descr, signature[i]);
+ if (original_dtypes[i] == NULL) {
+ nop = i; /* only this much is initialized */
+ goto finish;
+ }
+ }
+ }
+
+ NPY_UF_DBG_PRINT("Resolving the descriptors\n");
+
+ if (ufuncimpl->resolve_descriptors != &wrapped_legacy_resolve_descriptors) {
+ /* The default: use the `ufuncimpl` as nature intended it */
+ NPY_CASTING safety = ufuncimpl->resolve_descriptors(ufuncimpl,
+ signature, original_dtypes, dtypes);
+ if (safety < 0) {
+ goto finish;
+ }
+ if (NPY_UNLIKELY(PyArray_MinCastSafety(safety, casting) != casting)) {
+ /* TODO: Currently impossible to reach (specialized unsafe loop) */
+ PyErr_Format(PyExc_TypeError,
+ "The ufunc implementation for %s with the given dtype "
+ "signature is not possible under the casting rule %s",
+ ufunc_get_name_cstr(ufunc), npy_casting_to_string(casting));
+ goto finish;
+ }
+ retval = 0;
+ }
+ else {
+ /*
+ * Fall-back to legacy resolver using `operands`, used exclusively
+ * for datetime64/timedelta64 and custom ufuncs (in pyerfa/astropy).
+ */
+ retval = ufunc->type_resolver(ufunc, casting, operands, NULL, dtypes);
}
- res = _make_new_typetup(nop, signature, out_typetup);
finish:
- for (int i =0; i < nop; i++) {
- Py_XDECREF(signature[i]);
+ for (int i = 0; i < nop; i++) {
+ Py_XDECREF(original_dtypes[i]);
}
- return res;
+ return retval;
}
@@ -4553,13 +4636,16 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
/* All following variables are cleared in the `fail` error path */
ufunc_full_args full_args;
PyArrayObject *wheremask = NULL;
- PyObject *typetup = NULL;
+ PyArray_DTypeMeta *signature[NPY_MAXARGS];
PyArrayObject *operands[NPY_MAXARGS];
+ PyArray_DTypeMeta *operand_DTypes[NPY_MAXARGS];
PyArray_Descr *operation_descrs[NPY_MAXARGS];
PyObject *output_array_prepare[NPY_MAXARGS];
/* Initialize all arrays (we usually only need a small part) */
+ memset(signature, 0, nop * sizeof(*signature));
memset(operands, 0, nop * sizeof(*operands));
+ memset(operand_DTypes, 0, nop * sizeof(*operation_descrs));
memset(operation_descrs, 0, nop * sizeof(*operation_descrs));
memset(output_array_prepare, 0, nout * sizeof(*output_array_prepare));
@@ -4572,7 +4658,7 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
*/
/* Check number of arguments */
- if ((len_args < nin) || (len_args > nop)) {
+ if (NPY_UNLIKELY((len_args < nin) || (len_args > nop))) {
PyErr_Format(PyExc_TypeError,
"%s() takes from %d to %d positional arguments but "
"%zd were given",
@@ -4731,7 +4817,8 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
* Parse the passed `dtype` or `signature` into an array containing
* PyArray_DTypeMeta and/or None.
*/
- if (_get_normalized_typetup(ufunc, dtype_obj, signature_obj, &typetup) < 0) {
+ if (_get_fixed_signature(ufunc,
+ dtype_obj, signature_obj, signature) < 0) {
goto fail;
}
@@ -4739,7 +4826,13 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
NPY_CASTING casting = NPY_DEFAULT_ASSIGN_CASTING;
npy_bool subok = NPY_TRUE;
int keepdims = -1; /* We need to know if it was passed */
- if (convert_ufunc_arguments(ufunc, full_args, operands,
+ npy_bool force_legacy_promotion;
+ npy_bool allow_legacy_promotion;
+ if (convert_ufunc_arguments(ufunc,
+ /* extract operand related information: */
+ full_args, operands,
+ operand_DTypes, &force_legacy_promotion, &allow_legacy_promotion,
+ /* extract general information: */
order_obj, &order,
casting_obj, &casting,
subok_obj, &subok,
@@ -4748,8 +4841,24 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
goto fail;
}
- if (ufunc->type_resolver(ufunc,
- casting, operands, typetup, operation_descrs) < 0) {
+ /*
+ * Note that part of the promotion is to the complete the signature
+ * (until here it only represents the fixed part and is usually NULLs).
+ *
+ * After promotion, we could push the following logic into the ArrayMethod
+ * in the future. For now, we do it here. The type resolution step can
+ * be shared between the ufunc and gufunc code.
+ */
+ PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
+ operands, signature,
+ operand_DTypes, force_legacy_promotion, allow_legacy_promotion);
+ if (ufuncimpl == NULL) {
+ goto fail;
+ }
+
+ /* Find the correct descriptors for the operation */
+ if (resolve_descriptors(nop, ufunc, ufuncimpl,
+ operands, operation_descrs, signature, casting) < 0) {
goto fail;
}
@@ -4761,20 +4870,17 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
* Do the final preparations and call the inner-loop.
*/
if (!ufunc->core_enabled) {
- errval = PyUFunc_GenericFunctionInternal(ufunc,
- operation_descrs, operands,
- extobj, order,
+ errval = PyUFunc_GenericFunctionInternal(ufunc, ufuncimpl,
+ operation_descrs, operands, extobj, casting, order,
output_array_prepare, full_args, /* for __array_prepare__ */
wheremask);
}
else {
- errval = PyUFunc_GeneralizedFunctionInternal(ufunc,
- operation_descrs, operands,
- extobj, order,
+ errval = PyUFunc_GeneralizedFunctionInternal(ufunc, ufuncimpl,
+ operation_descrs, operands, extobj, casting, order,
/* GUFuncs never (ever) called __array_prepare__! */
axis_obj, axes_obj, keepdims);
}
-
if (errval < 0) {
goto fail;
}
@@ -4785,6 +4891,7 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
*/
Py_XDECREF(wheremask);
for (int i = 0; i < nop; i++) {
+ Py_XDECREF(operand_DTypes[i]);
Py_DECREF(operation_descrs[i]);
if (i < nin) {
Py_DECREF(operands[i]);
@@ -4793,22 +4900,21 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
Py_XDECREF(output_array_prepare[i-nin]);
}
}
- Py_XDECREF(typetup);
-
/* The following steals the references to the outputs: */
PyObject *result = replace_with_wrapped_result_and_return(ufunc,
full_args, subok, operands+nin);
Py_XDECREF(full_args.in);
Py_XDECREF(full_args.out);
+
return result;
fail:
- Py_XDECREF(typetup);
Py_XDECREF(full_args.in);
Py_XDECREF(full_args.out);
Py_XDECREF(wheremask);
for (int i = 0; i < ufunc->nargs; i++) {
Py_XDECREF(operands[i]);
+ Py_XDECREF(operand_DTypes[i]);
Py_XDECREF(operation_descrs[i]);
if (i < nout) {
Py_XDECREF(output_array_prepare[i]);
@@ -5084,6 +5190,28 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector;
ufunc->_always_null_previously_masked_innerloop_selector = NULL;
+ ufunc->op_flags = NULL;
+ ufunc->_loops = NULL;
+ if (nin + nout != 0) {
+ ufunc->_dispatch_cache = PyArrayIdentityHash_New(nin + nout);
+ if (ufunc->_dispatch_cache == NULL) {
+ Py_DECREF(ufunc);
+ return NULL;
+ }
+ }
+ else {
+ /*
+ * Work around a test that seems to do this right now, it should not
+ * be a valid ufunc at all though, so. TODO: Remove...
+ */
+ ufunc->_dispatch_cache = NULL;
+ }
+ ufunc->_loops = PyList_New(0);
+ if (ufunc->_loops == NULL) {
+ Py_DECREF(ufunc);
+ return NULL;
+ }
+
if (name == NULL) {
ufunc->name = "?";
}
@@ -5105,6 +5233,29 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
return NULL;
}
}
+
+ char *curr_types = ufunc->types;
+ for (int i = 0; i < ntypes * (nin + nout); i += nin + nout) {
+ /*
+ * Add all legacy wrapping loops here. This is normally not necessary,
+ * but makes sense. It could also help/be needed to avoid issues with
+ * ambiguous loops such as: `OO->?` and `OO->O` where in theory the
+ * wrong loop could be picked if only the second one is added.
+ */
+ PyObject *info;
+ PyArray_DTypeMeta *op_dtypes[NPY_MAXARGS];
+ for (int arg = 0; arg < nin + nout; arg++) {
+ op_dtypes[arg] = PyArray_DTypeFromTypeNum(curr_types[arg]);
+ /* These DTypes are immortal and adding INCREFs: so borrow it */
+ Py_DECREF(op_dtypes[arg]);
+ }
+ curr_types += nin + nout;
+
+ info = add_and_return_legacy_wrapping_ufunc_loop(ufunc, op_dtypes, 1);
+ if (info == NULL) {
+ return NULL;
+ }
+ }
return (PyObject *)ufunc;
}
@@ -5320,6 +5471,8 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
PyArray_Descr *descr;
PyUFunc_Loop1d *funcdata;
PyObject *key, *cobj;
+ PyArray_DTypeMeta *signature[NPY_MAXARGS];
+ PyObject *signature_tuple = NULL;
int i;
int *newtypes=NULL;
@@ -5348,13 +5501,67 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
if (arg_types != NULL) {
for (i = 0; i < ufunc->nargs; i++) {
newtypes[i] = arg_types[i];
+ signature[i] = PyArray_DTypeFromTypeNum(arg_types[i]);
+ Py_DECREF(signature[i]); /* DType can't be deleted... */
}
}
else {
for (i = 0; i < ufunc->nargs; i++) {
newtypes[i] = usertype;
+ signature[i] = PyArray_DTypeFromTypeNum(usertype);
+ Py_DECREF(signature[i]); /* DType can't be deleted... */
+ }
+ }
+
+ signature_tuple = PyArray_TupleFromItems(
+ ufunc->nargs, (PyObject **)signature, 0);
+ if (signature_tuple == NULL) {
+ goto fail;
+ }
+ /*
+ * We add the loop to the list of all loops and promoters. If the
+ * equivalent loop was already added, skip this.
+ * Note that even then the ufunc is still modified: The legacy ArrayMethod
+ * already looks up the inner-loop from the ufunc (and this is replaced
+ * below!).
+ * If the existing one is not a legacy ArrayMethod, we raise currently:
+ * A new-style loop should not be replaced by an old-style one.
+ */
+ int add_new_loop = 1;
+ for (Py_ssize_t j = 0; j < PyList_GET_SIZE(ufunc->_loops); j++) {
+ PyObject *item = PyList_GET_ITEM(ufunc->_loops, j);
+ PyObject *existing_tuple = PyTuple_GET_ITEM(item, 0);
+
+ int cmp = PyObject_RichCompareBool(existing_tuple, signature_tuple, Py_EQ);
+ if (cmp < 0) {
+ goto fail;
+ }
+ if (!cmp) {
+ continue;
+ }
+ PyObject *registered = PyTuple_GET_ITEM(item, 1);
+ if (!PyObject_TypeCheck(registered, &PyArrayMethod_Type) || (
+ (PyArrayMethodObject *)registered)->get_strided_loop !=
+ &get_wrapped_legacy_ufunc_loop) {
+ PyErr_Format(PyExc_TypeError,
+ "A non-compatible loop was already registered for "
+ "ufunc %s and DTypes %S.",
+ ufunc_get_name_cstr(ufunc), signature_tuple);
+ goto fail;
+ }
+ /* The loop was already added */
+ add_new_loop = 0;
+ break;
+ }
+ if (add_new_loop) {
+ PyObject *info = add_and_return_legacy_wrapping_ufunc_loop(
+ ufunc, signature, 0);
+ if (info == NULL) {
+ goto fail;
}
}
+ /* Clearing sets it to NULL for the error paths */
+ Py_CLEAR(signature_tuple);
funcdata->func = function;
funcdata->arg_types = newtypes;
@@ -5429,6 +5636,7 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
fail:
Py_DECREF(key);
+ Py_XDECREF(signature_tuple);
PyArray_free(funcdata);
PyArray_free(newtypes);
if (!PyErr_Occurred()) PyErr_NoMemory();
@@ -5454,8 +5662,10 @@ ufunc_dealloc(PyUFuncObject *ufunc)
if (ufunc->identity == PyUFunc_IdentityValue) {
Py_DECREF(ufunc->identity_value);
}
- if (ufunc->obj != NULL) {
- Py_DECREF(ufunc->obj);
+ Py_XDECREF(ufunc->obj);
+ Py_XDECREF(ufunc->_loops);
+ if (ufunc->_dispatch_cache != NULL) {
+ PyArrayIdentityHash_Dealloc(ufunc->_dispatch_cache);
}
PyObject_GC_Del(ufunc);
}
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 468327b8c..a7d536656 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -1,4 +1,16 @@
/*
+ * NOTE: The type resolution defined in this file is considered legacy.
+ *
+ * The new mechanism separates type resolution and promotion into two
+ * distinct steps, as per NEP 43.
+ * Further, the functions in this file rely on the operands rather than
+ * only the DTypes/descriptors. They are still called and at this point
+ * vital (NumPy ~1.21), but should hopefully become largely irrelevant very
+ * quickly.
+ *
+ * At that point, this file should be deletable in its entirety.
+ *
+ *
* This file implements type resolution for NumPy element-wise ufuncs.
* This mechanism is still backwards-compatible with the pre-existing
* legacy mechanism, so performs much slower than is necessary.
@@ -89,9 +101,9 @@ raise_binary_type_reso_error(PyUFuncObject *ufunc, PyArrayObject **operands) {
/** Helper function to raise UFuncNoLoopError
* Always returns -1 to indicate the exception was raised, for convenience
*/
-static int
+NPY_NO_EXPORT int
raise_no_loop_found_error(
- PyUFuncObject *ufunc, PyArray_Descr **dtypes)
+ PyUFuncObject *ufunc, PyObject **dtypes)
{
static PyObject *exc_type = NULL;
@@ -102,8 +114,7 @@ raise_no_loop_found_error(
return -1;
}
- PyObject *dtypes_tup = PyArray_TupleFromItems(
- ufunc->nargs, (PyObject **)dtypes, 1);
+ PyObject *dtypes_tup = PyArray_TupleFromItems(ufunc->nargs, dtypes, 1);
if (dtypes_tup == NULL) {
return -1;
}
@@ -119,6 +130,7 @@ raise_no_loop_found_error(
return -1;
}
+
static int
raise_casting_error(
PyObject *exc_type,
@@ -333,22 +345,30 @@ PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc,
if (out_dtypes[0] == NULL) {
return -1;
}
+ out_dtypes[1] = out_dtypes[0];
+ Py_INCREF(out_dtypes[1]);
}
else {
/* Not doing anything will lead to a loop no found error. */
out_dtypes[0] = PyArray_DESCR(operands[0]);
Py_INCREF(out_dtypes[0]);
+ out_dtypes[1] = PyArray_DESCR(operands[1]);
+ Py_INCREF(out_dtypes[1]);
}
- out_dtypes[1] = out_dtypes[0];
- Py_INCREF(out_dtypes[1]);
}
else {
PyArray_Descr *descr;
/*
+ * DEPRECATED 2021-03, NumPy 1.20
+ *
* If the type tuple was originally a single element (probably),
* issue a deprecation warning, but otherwise accept it. Since the
* result dtype is always boolean, this is not actually valid unless it
* is `object` (but if there is an object input we already deferred).
+ *
+ * TODO: Once this deprecation is gone, the special case for
+ * `PyUFunc_SimpleBinaryComparisonTypeResolver` in dispatching.c
+ * can be removed.
*/
if (PyTuple_Check(type_tup) && PyTuple_GET_SIZE(type_tup) == 3 &&
PyTuple_GET_ITEM(type_tup, 0) == Py_None &&
@@ -527,7 +547,7 @@ PyUFunc_SimpleUniformOperationTypeResolver(
out_dtypes[iop] = PyArray_DESCR(operands[iop]);
Py_INCREF(out_dtypes[iop]);
}
- raise_no_loop_found_error(ufunc, out_dtypes);
+ raise_no_loop_found_error(ufunc, (PyObject **)out_dtypes);
for (iop = 0; iop < ufunc->nin; iop++) {
Py_DECREF(out_dtypes[iop]);
out_dtypes[iop] = NULL;
@@ -1492,7 +1512,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
types += nargs;
}
- return raise_no_loop_found_error(ufunc, dtypes);
+ return raise_no_loop_found_error(ufunc, (PyObject **)dtypes);
}
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index cd0ff4a0d..dd88a081a 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -135,4 +135,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
void **out_innerloopdata,
int *out_needs_api);
+NPY_NO_EXPORT int
+raise_no_loop_found_error(PyUFuncObject *ufunc, PyObject **dtypes);
+
#endif
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index 9d1d514fb..becd65b11 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -307,8 +307,8 @@ class TestModulus:
# promotes to float which does not fit
a = np.array([1, 2], np.int64)
b = np.array([1, 2], np.uint64)
- pattern = 'could not be coerced to provided output parameter'
- with assert_raises_regex(TypeError, pattern):
+ with pytest.raises(TypeError,
+ match=r"Cannot cast ufunc 'floor_divide' output from"):
a //= b
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 0251f21a9..dab11d948 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -164,8 +164,9 @@ class TestUfuncGenericLoops:
except AttributeError:
return lambda: getattr(np.core.umath, attr)(val)
- num_arr = np.array([val], dtype=np.float64)
- obj_arr = np.array([MyFloat(val)], dtype="O")
+ # Use 0-D arrays, to ensure the same element call
+ num_arr = np.array(val, dtype=np.float64)
+ obj_arr = np.array(MyFloat(val), dtype="O")
with np.errstate(all="raise"):
try:
@@ -1711,9 +1712,17 @@ class TestUfunc:
target = np.array([0, 2, 4], dtype=_rational_tests.rational)
assert_equal(result, target)
- # no output type should raise TypeError
+ # The new resolution means that we can (usually) find custom loops
+ # as long as they match exactly:
+ result = _rational_tests.test_add(a, b)
+ assert_equal(result, target)
+
+ # But since we use the old type resolver, this may not work
+ # for dtype variations unless the output dtype is given:
+ result = _rational_tests.test_add(a, b.astype(np.uint16), out=c)
+ assert_equal(result, target)
with assert_raises(TypeError):
- _rational_tests.test_add(a, b)
+ _rational_tests.test_add(a, b.astype(np.uint16))
def test_operand_flags(self):
a = np.arange(16, dtype='l').reshape(4, 4)
@@ -2029,8 +2038,7 @@ class TestUfunc:
np.true_divide, np.floor_divide, np.bitwise_and, np.bitwise_or,
np.bitwise_xor, np.left_shift, np.right_shift, np.fmax,
np.fmin, np.fmod, np.hypot, np.logaddexp, np.logaddexp2,
- np.logical_and, np.logical_or, np.logical_xor, np.maximum,
- np.minimum, np.mod,
+ np.maximum, np.minimum, np.mod,
np.greater, np.greater_equal, np.less, np.less_equal,
np.equal, np.not_equal]