summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/code_generators/numpy_api.py1
-rw-r--r--numpy/core/include/numpy/ufuncobject.h179
-rw-r--r--numpy/core/src/umath/ufunc_object.c766
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c467
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h64
-rw-r--r--numpy/core/src/umath/umathmodule.c.src25
6 files changed, 757 insertions, 745 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 76e006c65..2e159db06 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -386,7 +386,6 @@ ufunc_funcs_api = {
# End 1.6 API
'PyUFunc_DefaultTypeResolution': 39,
'PyUFunc_ValidateCasting': 40,
- 'PyUFunc_DefaultTypeResolutionMasked': 41,
}
# List of all the dicts which define the C API
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 47b195b2f..c0a2308b5 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -7,23 +7,36 @@
extern "C" {
#endif
-/* The most generic inner loop for a standard element-wise ufunc */
+/*
+ * The legacy generic inner loop for a standard element-wise or
+ * generalized ufunc.
+ */
typedef void (*PyUFuncGenericFunction)
(char **args,
npy_intp *dimensions,
- npy_intp *steps,
+ npy_intp *strides,
void *innerloopdata);
/*
- * The most generic inner loop for a masked standard element-wise ufunc.
+ * The most generic one-dimensional inner loop for
+ * a standard element-wise ufunc. This typedef is also
+ * more consistent with the other NumPy function pointer typedefs
+ * than PyUFuncGenericFunction.
*/
-typedef void (*PyUFuncGenericMaskedFunction)
- (char **args,
- char *mask_arg,
- npy_intp *dimensions,
- npy_intp *steps,
- npy_intp mask_step,
- NpyAuxData *innerloopdata);
+typedef void (PyUFunc_StridedInnerLoopFunc)(
+ char **dataptrs, npy_intp *strides,
+ npy_intp count,
+ NpyAuxData *innerloopdata);
+
+/*
+ * The most generic one-dimensional inner loop for
+ * a masked standard element-wise ufunc.
+ */
+typedef void (PyUFunc_MaskedStridedInnerLoopFunc)(
+ char **dataptrs, npy_intp *strides,
+ char *maskptr, npy_intp mask_stride,
+ npy_intp count,
+ NpyAuxData *innerloopdata);
/* Forward declaration for the type resolution function */
struct _tagPyUFuncObject;
@@ -49,10 +62,6 @@ struct _tagPyUFuncObject;
* references to (ufunc->nin + ufunc->nout) new
* dtypes, one for each input and output. These
* dtypes should all be in native-endian format.
- * out_innerloop: Should be populated with the correct ufunc inner
- * loop for the given type.
- * out_innerloopdata: Should be populated with the void* data to
- * be passed into the out_innerloop function.
*
* Should return 0 on success, -1 on failure (with exception set),
* or -2 if Py_NotImplemented should be returned.
@@ -62,17 +71,53 @@ typedef int (PyUFunc_TypeResolutionFunc)(
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
-typedef int (PyUFunc_TypeResolutionMaskedFunc)(
- struct _tagPyUFuncObject *ufunc,
- NPY_CASTING casting,
- PyArrayObject **operands,
- PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericMaskedFunction *out_innerloop,
- NpyAuxData **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
+
+/*
+ * Given an array of DTypes as returned by the PyUFunc_TypeResolutionFunc,
+ * and an array of fixed strides (the array will contain NPY_MAX_INTP for
+ * strides which are not necessarily fixed), returns an inner loop
+ * with associated auxiliary data.
+ *
+ * For backwards compatibility, there is a variant of the inner loop
+ * selection which returns an inner loop irrespective of the strides,
+ * and with a void* static auxiliary data instead of an NpyAuxData *
+ * dynamically allocatable auxiliary data.
+ *
+ * ufunc: The ufunc object.
+ * dtypes: An array which has been populated with dtypes,
+ * in most cases by the type resolution funciton
+ * for the same ufunc.
+ * fixed_strides: For each input/output, either the stride that
+ * will be used every time the function is called
+ * or NPY_MAX_INTP if the stride might change or
+ * is not known ahead of time. The loop selection
+ * function may use this stride to pick inner loops
+ * which are optimized for contiguous or 0-stride
+ * cases.
+ * out_innerloop: Should be populated with the correct ufunc inner
+ * loop for the given type.
+ * out_innerloopdata: Should be populated with the void* data to
+ * be passed into the out_innerloop function.
+ */
+typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)(
+ struct _tagPyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata);
+typedef int (PyUFunc_InnerLoopSelectionFunc)(
+ struct _tagPyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *fixed_strides,
+ PyUFunc_StridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata);
+typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)(
+ struct _tagPyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *fixed_strides,
+ npy_intp fixed_mask_stride,
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata);
typedef struct _tagPyUFuncObject {
PyObject_HEAD
@@ -137,17 +182,27 @@ typedef struct _tagPyUFuncObject {
char *core_signature;
/*
- * A function which resolves the types and returns an inner loop.
- * This is used by the regular ufunc, the reduction operations
- * have a different set of rules.
+ * A function which resolves the types and fills an array
+ * with the dtypes for the inputs and outputs.
*/
PyUFunc_TypeResolutionFunc *type_resolution_function;
/*
- * A function which resolves the types and returns an inner loop.
- * This is used by the regular ufunc when it requires using
- * a mask to select which elements to compute.
+ * A function which returns an inner loop written for
+ * NumPy 1.6 and earlier ufuncs. This is for backwards
+ * compatibility, and may be NULL if inner_loop_selector
+ * is specified.
*/
- PyUFunc_TypeResolutionMaskedFunc *type_resolution_masked_function;
+ PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector;
+ /*
+ * A function which returns an inner loop for the new mechanism
+ * in NumPy 1.7 and later. If provided, this is used, otherwise
+ * if NULL the legacy_inner_loop_selector is used instead.
+ */
+ PyUFunc_InnerLoopSelectionFunc *inner_loop_selector;
+ /*
+ * A function which returns a masked inner loop for the ufunc.
+ */
+ PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
} PyUFuncObject;
#include "arrayobject.h"
@@ -231,12 +286,12 @@ typedef struct _loop1d_info {
#define UFUNC_PYVALS_NAME "UFUNC_PYVALS"
-#define UFUNC_CHECK_ERROR(arg) \
- do {if ((((arg)->obj & UFUNC_OBJ_NEEDS_API) && PyErr_Occurred()) || \
- ((arg)->errormask && \
- PyUFunc_checkfperr((arg)->errormask, \
- (arg)->errobj, \
- &(arg)->first))) \
+#define UFUNC_CHECK_ERROR(arg) \
+ do {if ((((arg)->obj & UFUNC_OBJ_NEEDS_API) && PyErr_Occurred()) || \
+ ((arg)->errormask && \
+ PyUFunc_checkfperr((arg)->errormask, \
+ (arg)->errobj, \
+ &(arg)->first))) \
goto fail;} while (0)
/* This code checks the IEEE status flags in a platform-dependent way */
@@ -251,12 +306,12 @@ typedef struct _loop1d_info {
#include <machine/fpu.h>
-#define UFUNC_CHECK_STATUS(ret) { \
- unsigned long fpstatus; \
- \
- fpstatus = ieee_get_fp_control(); \
+#define UFUNC_CHECK_STATUS(ret) { \
+ unsigned long fpstatus; \
+ \
+ fpstatus = ieee_get_fp_control(); \
/* clear status bits as well as disable exception mode if on */ \
- ieee_set_fp_control( 0 ); \
+ ieee_set_fp_control( 0 ); \
ret = ((IEEE_STATUS_DZE & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
| ((IEEE_STATUS_OVF & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
| ((IEEE_STATUS_UNF & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
@@ -273,13 +328,13 @@ typedef struct _loop1d_info {
#define UFUNC_NOFPE _control87(MCW_EM, MCW_EM);
#endif
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus = (int) _clearfp(); \
- \
+#define UFUNC_CHECK_STATUS(ret) { \
+ int fpstatus = (int) _clearfp(); \
+ \
ret = ((SW_ZERODIVIDE & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((SW_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
+ | ((SW_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
| ((SW_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((SW_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
+ | ((SW_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
}
/* Solaris --------------------------------------------------------*/
@@ -290,15 +345,15 @@ typedef struct _loop1d_info {
defined(__NetBSD__)
#include <ieeefp.h>
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus; \
- \
- fpstatus = (int) fpgetsticky(); \
- ret = ((FP_X_DZ & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((FP_X_OFL & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((FP_X_UFL & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((FP_X_INV & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- (void) fpsetsticky(0); \
+#define UFUNC_CHECK_STATUS(ret) { \
+ int fpstatus; \
+ \
+ fpstatus = (int) fpgetsticky(); \
+ ret = ((FP_X_DZ & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
+ | ((FP_X_OFL & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
+ | ((FP_X_UFL & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
+ | ((FP_X_INV & fpstatus) ? UFUNC_FPE_INVALID : 0); \
+ (void) fpsetsticky(0); \
}
#elif defined(__GLIBC__) || defined(__APPLE__) || \
@@ -312,15 +367,15 @@ typedef struct _loop1d_info {
#include "fenv/fenv.c"
#endif
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus = (int) fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | \
- FE_UNDERFLOW | FE_INVALID); \
+#define UFUNC_CHECK_STATUS(ret) { \
+ int fpstatus = (int) fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | \
+ FE_UNDERFLOW | FE_INVALID); \
ret = ((FE_DIVBYZERO & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
| ((FE_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
| ((FE_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
| ((FE_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- (void) feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | \
- FE_UNDERFLOW | FE_INVALID); \
+ (void) feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | \
+ FE_UNDERFLOW | FE_INVALID); \
}
#elif defined(_AIX)
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 643e1d6f0..e222b4945 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -39,6 +39,7 @@
#include "numpy/noprefix.h"
#include "numpy/ufuncobject.h"
#include "lowlevel_strided_loops.h"
+#include "ufunc_type_resolution.h"
#include "ufunc_object.h"
@@ -544,11 +545,11 @@ _is_same_name(const char* s1, const char* s2)
/*
* Sets core_num_dim_ix, core_num_dims, core_dim_ixs, core_offsets,
- * and core_signature in PyUFuncObject "self". Returns 0 unless an
+ * and core_signature in PyUFuncObject "ufunc". Returns 0 unless an
* error occured.
*/
static int
-_parse_signature(PyUFuncObject *self, const char *signature)
+_parse_signature(PyUFuncObject *ufunc, const char *signature)
{
size_t len;
char const **var_names;
@@ -565,9 +566,9 @@ _parse_signature(PyUFuncObject *self, const char *signature)
}
len = strlen(signature);
- self->core_signature = PyArray_malloc(sizeof(char) * (len+1));
- if (self->core_signature) {
- strcpy(self->core_signature, signature);
+ ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1));
+ if (ufunc->core_signature) {
+ strcpy(ufunc->core_signature, signature);
}
/* Allocate sufficient memory to store pointers to all dimension names */
var_names = PyArray_malloc(sizeof(char const*) * len);
@@ -576,13 +577,13 @@ _parse_signature(PyUFuncObject *self, const char *signature)
return -1;
}
- self->core_enabled = 1;
- self->core_num_dim_ix = 0;
- self->core_num_dims = PyArray_malloc(sizeof(int) * self->nargs);
- self->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
- self->core_offsets = PyArray_malloc(sizeof(int) * self->nargs);
- if (self->core_num_dims == NULL || self->core_dim_ixs == NULL
- || self->core_offsets == NULL) {
+ ufunc->core_enabled = 1;
+ ufunc->core_num_dim_ix = 0;
+ ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs);
+ ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
+ ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs);
+ if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL
+ || ufunc->core_offsets == NULL) {
PyErr_NoMemory();
goto fail;
}
@@ -590,7 +591,7 @@ _parse_signature(PyUFuncObject *self, const char *signature)
i = _next_non_white_space(signature, 0);
while (signature[i] != '\0') {
/* loop over input/output arguments */
- if (cur_arg == self->nin) {
+ if (cur_arg == ufunc->nin) {
/* expect "->" */
if (signature[i] != '-' || signature[i+1] != '>') {
parse_error = "expect '->'";
@@ -615,17 +616,17 @@ _parse_signature(PyUFuncObject *self, const char *signature)
parse_error = "expect dimension name";
goto fail;
}
- while (j < self->core_num_dim_ix) {
+ while (j < ufunc->core_num_dim_ix) {
if (_is_same_name(signature+i, var_names[j])) {
break;
}
j++;
}
- if (j >= self->core_num_dim_ix) {
+ if (j >= ufunc->core_num_dim_ix) {
var_names[j] = signature+i;
- self->core_num_dim_ix++;
+ ufunc->core_num_dim_ix++;
}
- self->core_dim_ixs[cur_core_dim] = j;
+ ufunc->core_dim_ixs[cur_core_dim] = j;
cur_core_dim++;
nd++;
i = _get_end_of_name(signature, i);
@@ -643,13 +644,13 @@ _parse_signature(PyUFuncObject *self, const char *signature)
}
}
}
- self->core_num_dims[cur_arg] = nd;
- self->core_offsets[cur_arg] = cur_core_dim-nd;
+ ufunc->core_num_dims[cur_arg] = nd;
+ ufunc->core_offsets[cur_arg] = cur_core_dim-nd;
cur_arg++;
nd = 0;
i = _next_non_white_space(signature, i + 1);
- if (cur_arg != self->nin && cur_arg != self->nargs) {
+ if (cur_arg != ufunc->nin && cur_arg != ufunc->nargs) {
/*
* The list of input arguments (or output arguments) was
* only read partially
@@ -661,15 +662,15 @@ _parse_signature(PyUFuncObject *self, const char *signature)
i = _next_non_white_space(signature, i + 1);
}
}
- if (cur_arg != self->nargs) {
+ if (cur_arg != ufunc->nargs) {
parse_error = "incomplete signature: not all arguments found";
goto fail;
}
- self->core_dim_ixs = PyArray_realloc(self->core_dim_ixs,
+ ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs,
sizeof(int)*cur_core_dim);
/* check for trivial core-signature, e.g. "(),()->()" */
if (cur_core_dim == 0) {
- self->core_enabled = 0;
+ ufunc->core_enabled = 0;
}
PyArray_free((void*)var_names);
return 0;
@@ -701,7 +702,7 @@ fail:
* non-zero references in out_op. This
* function does not do its own clean-up.
*/
-static int get_ufunc_arguments(PyUFuncObject *self,
+static int get_ufunc_arguments(PyUFuncObject *ufunc,
PyObject *args, PyObject *kwds,
PyArrayObject **out_op,
NPY_ORDER *out_order,
@@ -712,7 +713,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
PyArrayObject **out_wheremask,
int *out_use_maskna)
{
- int i, nargs, nin = self->nin, nout = self->nout;
+ int i, nargs, nin = ufunc->nin, nout = ufunc->nout;
PyObject *obj, *context;
PyObject *str_key_obj = NULL;
char *ufunc_name;
@@ -720,7 +721,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
int any_flexible = 0, any_object = 0;
int any_non_maskna_out = 0, any_maskna_out = 0;
- ufunc_name = self->name ? self->name : "<unnamed ufunc>";
+ ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
*out_extobj = NULL;
*out_typetup = NULL;
@@ -730,7 +731,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
/* Check number of arguments */
nargs = PyTuple_Size(args);
- if ((nargs < nin) || (nargs > self->nargs)) {
+ if ((nargs < nin) || (nargs > ufunc->nargs)) {
PyErr_SetString(PyExc_ValueError, "invalid number of arguments");
return -1;
}
@@ -746,7 +747,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
* TODO: There should be a comment here explaining what
* context does.
*/
- context = Py_BuildValue("OOi", self, args, i);
+ context = Py_BuildValue("OOi", ufunc, args, i);
if (context == NULL) {
return -1;
}
@@ -1036,12 +1037,12 @@ fail:
* -1 if there is an error.
*/
static int
-check_for_trivial_loop(PyUFuncObject *self,
+check_for_trivial_loop(PyUFuncObject *ufunc,
PyArrayObject **op,
PyArray_Descr **dtype,
npy_intp buffersize)
{
- npy_intp i, nin = self->nin, nop = nin + self->nout;
+ npy_intp i, nin = ufunc->nin, nop = nin + ufunc->nout;
for (i = 0; i < nop; ++i) {
/*
@@ -1152,7 +1153,7 @@ trivial_three_operand_loop(PyArrayObject **op,
* exactly the same, which may be more strict than before.
*/
static int
-prepare_ufunc_output(PyUFuncObject *self,
+prepare_ufunc_output(PyUFuncObject *ufunc,
PyArrayObject **op,
PyObject *arr_prep,
PyObject *arr_prep_args,
@@ -1163,7 +1164,7 @@ prepare_ufunc_output(PyUFuncObject *self,
PyArrayObject *arr;
res = PyObject_CallFunction(arr_prep, "O(OOi)",
- *op, self, arr_prep_args, i);
+ *op, ufunc, arr_prep_args, i);
if ((res == NULL) || (res == Py_None) || !PyArray_Check(res)) {
if (!PyErr_Occurred()){
PyErr_SetString(PyExc_TypeError,
@@ -1207,7 +1208,7 @@ prepare_ufunc_output(PyUFuncObject *self,
}
static int
-iterator_loop(PyUFuncObject *self,
+iterator_loop(PyUFuncObject *ufunc,
PyArrayObject **op,
PyArray_Descr **dtype,
NPY_ORDER order,
@@ -1217,7 +1218,7 @@ iterator_loop(PyUFuncObject *self,
PyUFuncGenericFunction innerloop,
void *innerloopdata)
{
- npy_intp i, nin = self->nin, nout = self->nout;
+ npy_intp i, nin = ufunc->nin, nout = ufunc->nout;
npy_intp nop = nin + nout;
npy_uint32 op_flags[NPY_MAXARGS];
NpyIter *iter;
@@ -1278,7 +1279,7 @@ iterator_loop(PyUFuncObject *self,
/* Call the __array_prepare__ functions where necessary */
for (i = 0; i < nout; ++i) {
- if (prepare_ufunc_output(self, &op[nin+i],
+ if (prepare_ufunc_output(ufunc, &op[nin+i],
arr_prep[i], arr_prep_args, i) < 0) {
NpyIter_Deallocate(iter);
return -1;
@@ -1341,18 +1342,27 @@ iterator_loop(PyUFuncObject *self,
* innerloopdata - data to pass to the inner loop
*/
static int
-execute_ufunc_loop(PyUFuncObject *self,
+execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
int trivial_loop_ok,
PyArrayObject **op,
- PyArray_Descr **dtype,
+ PyArray_Descr **dtypes,
NPY_ORDER order,
npy_intp buffersize,
PyObject **arr_prep,
- PyObject *arr_prep_args,
- PyUFuncGenericFunction innerloop,
- void *innerloopdata)
+ PyObject *arr_prep_args)
{
- npy_intp nin = self->nin, nout = self->nout;
+ npy_intp nin = ufunc->nin, nout = ufunc->nout;
+ PyUFuncGenericFunction innerloop;
+ void *innerloopdata;
+
+ if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+ &innerloop, &innerloopdata) < 0) {
+ return -1;
+ }
+ /* If the loop wants the arrays, provide them. */
+ if (_does_loop_use_arrays(innerloopdata)) {
+ innerloopdata = (void*)op;
+ }
/* First check for the trivial cases that don't need an iterator */
if (trivial_loop_ok) {
@@ -1360,9 +1370,9 @@ execute_ufunc_loop(PyUFuncObject *self,
if (op[1] == NULL &&
(order == NPY_ANYORDER || order == NPY_KEEPORDER) &&
PyArray_TRIVIALLY_ITERABLE(op[0])) {
- Py_INCREF(dtype[1]);
+ Py_INCREF(dtypes[1]);
op[1] = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- dtype[1],
+ dtypes[1],
PyArray_NDIM(op[0]),
PyArray_DIMS(op[0]),
NULL, NULL,
@@ -1371,7 +1381,7 @@ execute_ufunc_loop(PyUFuncObject *self,
NULL);
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[1],
+ if (prepare_ufunc_output(ufunc, &op[1],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1386,7 +1396,7 @@ execute_ufunc_loop(PyUFuncObject *self,
PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) {
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[1],
+ if (prepare_ufunc_output(ufunc, &op[1],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1412,9 +1422,9 @@ execute_ufunc_loop(PyUFuncObject *self,
else {
tmp = op[1];
}
- Py_INCREF(dtype[2]);
+ Py_INCREF(dtypes[2]);
op[2] = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- dtype[2],
+ dtypes[2],
PyArray_NDIM(tmp),
PyArray_DIMS(tmp),
NULL, NULL,
@@ -1423,7 +1433,7 @@ execute_ufunc_loop(PyUFuncObject *self,
NULL);
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[2],
+ if (prepare_ufunc_output(ufunc, &op[2],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1439,7 +1449,7 @@ execute_ufunc_loop(PyUFuncObject *self,
PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2])) {
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[2],
+ if (prepare_ufunc_output(ufunc, &op[2],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1458,7 +1468,7 @@ execute_ufunc_loop(PyUFuncObject *self,
*/
NPY_UF_DBG_PRINT("iterator loop\n");
- if (iterator_loop(self, op, dtype, order,
+ if (iterator_loop(ufunc, op, dtypes, order,
buffersize, arr_prep, arr_prep_args,
innerloop, innerloopdata) < 0) {
return -1;
@@ -1513,19 +1523,17 @@ combine_ufunc_maskna(char **masks, npy_intp *strides, npy_intp count,
* innerloopdata - data to pass to the inner loop
*/
static int
-execute_ufunc_masked_loop(PyUFuncObject *self,
+execute_ufunc_masked_loop(PyUFuncObject *ufunc,
PyArrayObject *wheremask,
int use_maskna,
PyArrayObject **op,
- PyArray_Descr **dtype,
+ PyArray_Descr **dtypes,
NPY_ORDER order,
npy_intp buffersize,
PyObject **arr_prep,
- PyObject *arr_prep_args,
- PyUFuncGenericMaskedFunction innerloop,
- NpyAuxData *innerloopdata)
+ PyObject *arr_prep_args)
{
- int i, nin = self->nin, nout = self->nout;
+ int i, nin = ufunc->nin, nout = ufunc->nout;
int nop = nin + nout;
npy_uint32 op_flags[NPY_MAXARGS];
NpyIter *iter;
@@ -1534,8 +1542,8 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
NpyIter_IterNextFunc *iternext;
char **dataptr;
- npy_intp *stride;
- npy_intp *count_ptr;
+ npy_intp *strides;
+ npy_intp *countptr;
PyArrayObject **op_it;
@@ -1548,7 +1556,7 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
return -1;
}
op[nop] = wheremask;
- dtype[nop] = NULL;
+ dtypes[nop] = NULL;
default_op_out_flags |= NPY_ITER_WRITEMASKED;
}
@@ -1598,7 +1606,7 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
NPY_ITER_BUFFERED |
NPY_ITER_GROWINNER,
order, NPY_UNSAFE_CASTING,
- op_flags, dtype,
+ op_flags, dtypes,
0, NULL, NULL, buffersize);
if (iter == NULL) {
return -1;
@@ -1619,7 +1627,7 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
/* Call the __array_prepare__ functions where necessary */
for (i = 0; i < nout; ++i) {
- if (prepare_ufunc_output(self, &op[nin+i],
+ if (prepare_ufunc_output(ufunc, &op[nin+i],
arr_prep[i], arr_prep_args, i) < 0) {
NpyIter_Deallocate(iter);
return -1;
@@ -1628,6 +1636,9 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
/* Only do the loop if the iteration size is non-zero */
if (NpyIter_GetIterSize(iter) != 0) {
+ PyUFunc_MaskedStridedInnerLoopFunc *innerloop;
+ NpyAuxData *innerloopdata;
+ npy_intp fixed_strides[2*NPY_MAXARGS];
/* Validate that the prepare_ufunc_output didn't mess with pointers */
for (i = nin; i < nop; ++i) {
@@ -1640,6 +1651,20 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
}
}
+ /*
+ * Get the inner loop, with the possibility of specialization
+ * based on the fixed strides.
+ */
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
+ fixed_strides,
+ wheremask != NULL ? fixed_strides[nop]
+ : fixed_strides[nop + nin],
+ &innerloop, &innerloopdata) < 0) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
@@ -1647,8 +1672,8 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
return -1;
}
dataptr = NpyIter_GetDataPtrArray(iter);
- stride = NpyIter_GetInnerStrideArray(iter);
- count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
+ strides = NpyIter_GetInnerStrideArray(iter);
+ countptr = NpyIter_GetInnerLoopSizePtr(iter);
if (!needs_api) {
NPY_BEGIN_THREADS;
@@ -1658,26 +1683,30 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
/* Execute the loop */
if (wheremask != NULL) {
do {
- NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr);
- innerloop(dataptr, dataptr[nop], count_ptr,
- stride, stride[nop], innerloopdata);
+ NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr);
+ innerloop(dataptr, strides,
+ dataptr[nop], strides[nop],
+ *countptr, innerloopdata);
} while (iternext(iter));
}
else {
do {
- NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr);
+ NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr);
/* Combine the input NA masks for the output */
- combine_ufunc_maskna(&dataptr[nop], &stride[nop], *count_ptr,
+ combine_ufunc_maskna(&dataptr[nop], &strides[nop], *countptr,
nin, nout);
/* Evaluate the ufunc wherever the NA mask says */
- innerloop(dataptr, dataptr[nop + nin], count_ptr,
- stride, stride[nop + nin], innerloopdata);
+ innerloop(dataptr, strides,
+ dataptr[nop + nin], strides[nop + nin],
+ *countptr, innerloopdata);
} while (iternext(iter));
}
if (!needs_api) {
NPY_END_THREADS;
}
+
+ NPY_AUXDATA_FREE(innerloopdata);
}
NpyIter_Deallocate(iter);
@@ -1723,7 +1752,7 @@ make_arr_prep_args(npy_intp nin, PyObject *args, PyObject *kwds)
}
static int
-PyUFunc_GeneralizedFunction(PyUFuncObject *self,
+PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
PyObject *args, PyObject *kwds,
PyArrayObject **op)
{
@@ -1732,7 +1761,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
char *ufunc_name;
int retval = -1, subok = 1;
- PyArray_Descr *dtype[NPY_MAXARGS];
+ PyArray_Descr *dtypes[NPY_MAXARGS];
/* Use remapped axes for generalized ufunc */
int broadcast_ndim, op_ndim;
@@ -1774,30 +1803,30 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
/* When provided, extobj and typetup contain borrowed references */
PyObject *extobj = NULL, *type_tup = NULL;
- if (self == NULL) {
+ if (ufunc == NULL) {
PyErr_SetString(PyExc_ValueError, "function not supported");
return -1;
}
- nin = self->nin;
- nout = self->nout;
+ nin = ufunc->nin;
+ nout = ufunc->nout;
nop = nin + nout;
- ufunc_name = self->name ? self->name : "<unnamed ufunc>";
+ ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
/* Initialize all the operands and dtypes to NULL */
for (i = 0; i < nop; ++i) {
op[i] = NULL;
- dtype[i] = NULL;
+ dtypes[i] = NULL;
arr_prep[i] = NULL;
}
NPY_UF_DBG_PRINT("Getting arguments\n");
/* Get all the arguments */
- retval = get_ufunc_arguments(self, args, kwds,
+ retval = get_ufunc_arguments(ufunc, args, kwds,
op, &order, &casting, &extobj,
&type_tup, &subok, NULL, &use_maskna);
if (retval < 0) {
@@ -1813,12 +1842,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
/* Figure out the number of dimensions needed by the iterator */
broadcast_ndim = 0;
for (i = 0; i < nin; ++i) {
- int n = PyArray_NDIM(op[i]) - self->core_num_dims[i];
+ int n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
if (n > broadcast_ndim) {
broadcast_ndim = n;
}
}
- op_ndim = broadcast_ndim + self->core_num_dim_ix;
+ op_ndim = broadcast_ndim + ufunc->core_num_dim_ix;
if (op_ndim > NPY_MAXDIMS) {
PyErr_Format(PyExc_ValueError,
"too many dimensions for generalized ufunc %s",
@@ -1829,7 +1858,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
/* Fill in op_axes for all the operands */
core_dim_ixs_size = 0;
- core_dim_ixs = self->core_dim_ixs;
+ core_dim_ixs = ufunc->core_dim_ixs;
for (i = 0; i < nop; ++i) {
int n;
if (op[i]) {
@@ -1837,7 +1866,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
* Note that n may be negative if broadcasting
* extends into the core dimensions.
*/
- n = PyArray_NDIM(op[i]) - self->core_num_dims[i];
+ n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
}
else {
n = broadcast_ndim;
@@ -1855,7 +1884,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
for (idim = broadcast_ndim; idim < op_ndim; ++idim) {
op_axes_arrays[i][idim] = -1;
}
- for (idim = 0; idim < self->core_num_dims[i]; ++idim) {
+ for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
if (n + idim >= 0) {
op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] =
n + idim;
@@ -1864,8 +1893,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] = -1;
}
}
- core_dim_ixs_size += self->core_num_dims[i];
- core_dim_ixs += self->core_num_dims[i];
+ core_dim_ixs_size += ufunc->core_num_dims[i];
+ core_dim_ixs += ufunc->core_num_dims[i];
op_axes[i] = op_axes_arrays[i];
}
@@ -1888,8 +1917,14 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
NPY_UF_DBG_PRINT("Finding inner loop\n");
- retval = self->type_resolution_function(self, casting,
- op, type_tup, dtype, &innerloop, &innerloopdata);
+ retval = ufunc->type_resolution_function(ufunc, casting,
+ op, type_tup, dtypes);
+ if (retval < 0) {
+ goto fail;
+ }
+ /* For the generalized ufunc, we get the loop right away too */
+ retval = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+ &innerloop, &innerloopdata);
if (retval < 0) {
goto fail;
}
@@ -1900,7 +1935,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
* an attribute (signalling it can handle ndarray's)
* and is not already an ndarray or a subtype of the same type.
*/
- if (nin == 2 && nout == 1 && dtype[1]->type_num == NPY_OBJECT) {
+ if (nin == 2 && nout == 1 && dtypes[1]->type_num == NPY_OBJECT) {
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
if (!PyArray_CheckExact(_obj)
/* If both are same subtype of object arrays, then proceed */
@@ -1915,12 +1950,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
#if NPY_UF_DBG_TRACING
printf("input types:\n");
for (i = 0; i < nin; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\noutput types:\n");
for (i = nin; i < nop; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\n");
@@ -1969,7 +2004,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
NPY_ITER_REFS_OK|
NPY_ITER_REDUCE_OK,
order, NPY_UNSAFE_CASTING, op_flags,
- dtype, op_ndim, op_axes, NULL, 0);
+ dtypes, op_ndim, op_axes, NULL, 0);
if (iter == NULL) {
retval = -1;
goto fail;
@@ -1990,9 +2025,9 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
inner_strides = (npy_intp *)PyArray_malloc(
NPY_SIZEOF_INTP * (nop+core_dim_ixs_size));
/* The strides after the first nop match core_dim_ixs */
- core_dim_ixs = self->core_dim_ixs;
+ core_dim_ixs = ufunc->core_dim_ixs;
inner_strides_tmp = inner_strides + nop;
- for (idim = 0; idim < self->core_num_dim_ix; ++idim) {
+ for (idim = 0; idim < ufunc->core_num_dim_ix; ++idim) {
ax_strides_tmp[idim] = NpyIter_GetAxisStrideArray(iter,
broadcast_ndim+idim);
if (ax_strides_tmp[idim] == NULL) {
@@ -2001,12 +2036,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
}
}
for (i = 0; i < nop; ++i) {
- for (idim = 0; idim < self->core_num_dims[i]; ++idim) {
+ for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
inner_strides_tmp[idim] = ax_strides_tmp[core_dim_ixs[idim]][i];
}
- core_dim_ixs += self->core_num_dims[i];
- inner_strides_tmp += self->core_num_dims[i];
+ core_dim_ixs += ufunc->core_num_dims[i];
+ inner_strides_tmp += ufunc->core_num_dims[i];
}
/* Set up the inner dimensions array */
@@ -2016,10 +2051,10 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
}
/* Move the core dimensions to start at the second element */
memmove(&inner_dimensions[1], &inner_dimensions[broadcast_ndim],
- NPY_SIZEOF_INTP * self->core_num_dim_ix);
+ NPY_SIZEOF_INTP * ufunc->core_num_dim_ix);
/* Remove all the core dimensions from the iterator */
- for (i = 0; i < self->core_num_dim_ix; ++i) {
+ for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) {
retval = -1;
goto fail;
@@ -2087,7 +2122,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
NpyIter_Deallocate(iter);
/* The caller takes ownership of all the references in op */
for (i = 0; i < nop; ++i) {
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2109,7 +2144,7 @@ fail:
for (i = 0; i < nop; ++i) {
Py_XDECREF(op[i]);
op[i] = NULL;
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2127,7 +2162,7 @@ fail:
* 'op' is an array of at least NPY_MAXARGS PyArrayObject *.
*/
NPY_NO_EXPORT int
-PyUFunc_GenericFunction(PyUFuncObject *self,
+PyUFunc_GenericFunction(PyUFuncObject *ufunc,
PyObject *args, PyObject *kwds,
PyArrayObject **op)
{
@@ -2137,24 +2172,13 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
int retval = -1, subok = 1;
int usemaskedloop = 0;
- PyArray_Descr *dtype[NPY_MAXARGS];
+ PyArray_Descr *dtypes[NPY_MAXARGS];
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
PyObject *errobj = NULL;
int first_error = 1;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
-
- /*
- * The selected masked inner loop, when the 'where='
- * parameter or arrays with missing values are in op.
- */
- PyUFuncGenericMaskedFunction masked_innerloop = NULL;
- NpyAuxData *masked_innerloopdata = NULL;
-
/* The mask provided in the 'where=' parameter */
PyArrayObject *wheremask = NULL;
@@ -2174,34 +2198,34 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
/* When provided, extobj and typetup contain borrowed references */
PyObject *extobj = NULL, *type_tup = NULL;
- if (self == NULL) {
+ if (ufunc == NULL) {
PyErr_SetString(PyExc_ValueError, "function not supported");
return -1;
}
- if (self->core_enabled) {
- return PyUFunc_GeneralizedFunction(self, args, kwds, op);
+ if (ufunc->core_enabled) {
+ return PyUFunc_GeneralizedFunction(ufunc, args, kwds, op);
}
- nin = self->nin;
- nout = self->nout;
+ nin = ufunc->nin;
+ nout = ufunc->nout;
nop = nin + nout;
- ufunc_name = self->name ? self->name : "<unnamed ufunc>";
+ ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
/* Initialize all the operands and dtypes to NULL */
for (i = 0; i < nop; ++i) {
op[i] = NULL;
- dtype[i] = NULL;
+ dtypes[i] = NULL;
arr_prep[i] = NULL;
}
NPY_UF_DBG_PRINT("Getting arguments\n");
/* Get all the arguments */
- retval = get_ufunc_arguments(self, args, kwds,
+ retval = get_ufunc_arguments(ufunc, args, kwds,
op, &order, &casting, &extobj,
&type_tup, &subok, &wheremask, &use_maskna);
if (retval < 0) {
@@ -2245,30 +2269,19 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
NPY_UF_DBG_PRINT("Finding inner loop\n");
- if (usemaskedloop) {
- retval = self->type_resolution_masked_function(self, casting,
- op, type_tup, dtype,
- &masked_innerloop, &masked_innerloopdata);
- if (retval < 0) {
- goto fail;
- }
+ retval = ufunc->type_resolution_function(ufunc, casting,
+ op, type_tup, dtypes);
+ if (retval < 0) {
+ goto fail;
}
- else {
- retval = self->type_resolution_function(self, casting,
- op, type_tup, dtype,
- &innerloop, &innerloopdata);
- if (retval < 0) {
- goto fail;
- }
+ /* Only do the trivial loop check for the unmasked version. */
+ if (!usemaskedloop) {
/*
- * This checks whether a trivial loop is ok,
- * making copies of scalar and one dimensional operands if that will
- * help.
- *
- * Only do the trivial loop check for the unmasked version.
+ * This checks whether a trivial loop is ok, making copies of
+ * scalar and one dimensional operands if that will help.
*/
- trivial_loop_ok = check_for_trivial_loop(self, op, dtype, buffersize);
+ trivial_loop_ok = check_for_trivial_loop(ufunc, op, dtypes, buffersize);
if (trivial_loop_ok < 0) {
goto fail;
}
@@ -2280,7 +2293,7 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
* an attribute (signalling it can handle ndarray's)
* and is not already an ndarray or a subtype of the same type.
*/
- if (nin == 2 && nout == 1 && dtype[1]->type_num == NPY_OBJECT) {
+ if (nin == 2 && nout == 1 && dtypes[1]->type_num == NPY_OBJECT) {
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
if (!PyArray_CheckExact(_obj)
/* If both are same subtype of object arrays, then proceed */
@@ -2296,12 +2309,12 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
#if NPY_UF_DBG_TRACING
printf("input types:\n");
for (i = 0; i < nin; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\noutput types:\n");
for (i = nin; i < nop; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\n");
@@ -2323,17 +2336,6 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
}
}
- /*
- * If the loop wants the arrays, provide them.
- *
- * TODO: Remove this, since this is already basically broken
- * with the addition of the masked inner loops and
- * not worth fixing.
- */
- if (!usemaskedloop && _does_loop_use_arrays(innerloopdata)) {
- innerloopdata = (void*)op;
- }
-
/* Start with the floating-point exception flags cleared */
PyUFunc_clearfperr();
@@ -2341,18 +2343,29 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
if (usemaskedloop) {
NPY_UF_DBG_PRINT("Executing masked inner loop\n");
- retval = execute_ufunc_masked_loop(self, wheremask, use_maskna,
- op, dtype, order,
- buffersize, arr_prep, arr_prep_args,
- masked_innerloop, masked_innerloopdata);
+ retval = execute_ufunc_masked_loop(ufunc, wheremask, use_maskna,
+ op, dtypes, order,
+ buffersize, arr_prep, arr_prep_args);
}
else {
NPY_UF_DBG_PRINT("Executing unmasked inner loop\n");
- retval = execute_ufunc_loop(self, trivial_loop_ok,
- op, dtype, order,
- buffersize, arr_prep, arr_prep_args,
- innerloop, innerloopdata);
+ if (ufunc->legacy_inner_loop_selector != NULL) {
+ retval = execute_legacy_ufunc_loop(ufunc, trivial_loop_ok,
+ op, dtypes, order,
+ buffersize, arr_prep, arr_prep_args);
+ }
+ else {
+ /*
+ * TODO: When this is supported, it should be preferred over
+ * the legacy_inner_loop_selector
+ */
+ PyErr_SetString(PyExc_RuntimeError,
+ "usage of the new inner_loop_selector isn't "
+ "implemented yet");
+ retval = -1;
+ goto fail;
+ }
}
if (retval < 0) {
goto fail;
@@ -2367,7 +2380,7 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
/* The caller takes ownership of all the references in op */
for (i = 0; i < nop; ++i) {
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2384,7 +2397,7 @@ fail:
for (i = 0; i < nop; ++i) {
Py_XDECREF(op[i]);
op[i] = NULL;
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2403,7 +2416,7 @@ fail:
* Returns 0 on success, -1 on failure.
*/
static int
-get_binary_op_function(PyUFuncObject *self, int *otype,
+get_binary_op_function(PyUFuncObject *ufunc, int *otype,
PyUFuncGenericFunction *out_innerloop,
void **out_innerloopdata)
{
@@ -2414,13 +2427,13 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
*otype);
/* If the type is custom and there are userloops, search for it here */
- if (self->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
+ if (ufunc->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
PyObject *key, *obj;
key = PyInt_FromLong(*otype);
if (key == NULL) {
return -1;
}
- obj = PyDict_GetItem(self->userloops, key);
+ obj = PyDict_GetItem(ufunc->userloops, key);
Py_DECREF(key);
if (obj != NULL) {
funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
@@ -2440,8 +2453,8 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
}
/* Search for a function with compatible inputs */
- for (i = 0; i < self->ntypes; ++i) {
- char *types = self->types + i*self->nargs;
+ for (i = 0; i < ufunc->ntypes; ++i) {
+ char *types = ufunc->types + i*ufunc->nargs;
NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n",
types[0], types[1], types[2]);
@@ -2451,8 +2464,8 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
(*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
/* If the signature is "xx->x", we found the loop */
if (types[2] == types[0]) {
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
+ *out_innerloop = ufunc->functions[i];
+ *out_innerloopdata = ufunc->data[i];
*otype = types[0];
return 0;
}
@@ -2468,16 +2481,16 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
}
/* Search for the exact function */
- for (i = 0; i < self->ntypes; ++i) {
- char *types = self->types + i*self->nargs;
+ for (i = 0; i < ufunc->ntypes; ++i) {
+ char *types = ufunc->types + i*ufunc->nargs;
if (PyArray_CanCastSafely(*otype, types[0]) &&
types[0] == types[1] &&
types[1] == types[2] &&
(*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
/* Since the signature is "xx->x", we found the loop */
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
+ *out_innerloop = ufunc->functions[i];
+ *out_innerloopdata = ufunc->data[i];
*otype = types[0];
return 0;
}
@@ -2495,17 +2508,17 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
* Returns 0 on success, -1 on failure.
*/
static int
-get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
+get_masked_binary_op_function(PyUFuncObject *ufunc, PyArrayObject *arr,
int otype,
PyArray_Descr **out_dtype,
- PyUFuncGenericMaskedFunction *out_innerloop,
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
NpyAuxData **out_innerloopdata)
{
int i, retcode;
PyArrayObject *op[3] = {arr, arr, NULL};
- PyArray_Descr *dtype[3] = {NULL, NULL, NULL};
- PyObject *type_tup = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
+ npy_intp fixed_strides[3] = {NPY_MAX_INTP, NPY_MAX_INTP, NPY_MAX_INTP};
NPY_UF_DBG_PRINT1("Getting masked binary op function for type number %d\n",
otype);
@@ -2513,49 +2526,58 @@ get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
*out_dtype = NULL;
/* Build a type tuple if otype is specified */
- if (otype != NPY_NOTYPE) {
+ if (otype == NPY_NOTYPE) {
+ /* Use the type resolution function to find our loop */
+ retcode = ufunc->type_resolution_function(
+ ufunc, NPY_SAME_KIND_CASTING,
+ op, NULL, dtypes);
+ if (retcode == -1) {
+ return -1;
+ }
+ else if (retcode == -2) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "type resolution returned NotImplemented");
+ return -1;
+ }
+
+ /* The selected dtypes should all be equivalent */
+ if (!PyArray_EquivTypes(dtypes[0], dtypes[1]) ||
+ !PyArray_EquivTypes(dtypes[1], dtypes[2])) {
+ for (i = 0; i < 3; ++i) {
+ Py_DECREF(dtypes[i]);
+ }
+ PyErr_Format(PyExc_RuntimeError,
+ "could not find a type resolution appropriate for "
+ "reduce ufunc %s", ufunc_name);
+ return -1;
+ }
+ }
+ else {
PyArray_Descr *otype_dtype = PyArray_DescrFromType(otype);
if (otype_dtype == NULL) {
return -1;
}
+ dtypes[0] = otype_dtype;
Py_INCREF(otype_dtype);
+ dtypes[1] = otype_dtype;
Py_INCREF(otype_dtype);
- type_tup = Py_BuildValue("(NNN)",
- otype_dtype, otype_dtype, otype_dtype);
- if (type_tup == NULL) {
- return -1;
- }
+ dtypes[2] = otype_dtype;
}
- /* Use the type resolution function to find our loop */
- retcode = self->type_resolution_masked_function(self, NPY_SAME_KIND_CASTING,
- op, type_tup, dtype,
- out_innerloop, out_innerloopdata);
- Py_XDECREF(type_tup);
- if (retcode == -1) {
- return -1;
- }
- else if (retcode == -2) {
- PyErr_SetString(PyExc_RuntimeError,
- "type resolution returned NotImplemented");
- return -1;
- }
+ /* Get the inner loop for the resolved dtypes */
+ if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
+ fixed_strides, NPY_MAX_INTP,
+ out_innerloop, out_innerloopdata) < 0) {
+ Py_DECREF(dtypes[0]);
+ Py_DECREF(dtypes[1]);
+ Py_DECREF(dtypes[2]);
- /* The selected dtypes should all be equivalent */
- if (!PyArray_EquivTypes(dtype[0], dtype[1]) ||
- !PyArray_EquivTypes(dtype[1], dtype[2])) {
- for (i = 0; i < 3; ++i) {
- Py_DECREF(dtype[i]);
- }
- PyErr_Format(PyExc_RuntimeError,
- "could not find a masked binary loop appropriate for "
- "reduce ufunc %s", ufunc_name);
return -1;
}
- *out_dtype = dtype[0];
- Py_DECREF(dtype[1]);
- Py_DECREF(dtype[2]);
+ *out_dtype = dtypes[0];
+ Py_DECREF(dtypes[1]);
+ Py_DECREF(dtypes[2]);
return 0;
}
@@ -2612,7 +2634,7 @@ initialize_reduce_result(int identity, PyArrayObject *result,
* this function does not validate them.
*/
static PyArrayObject *
-PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
+PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
int naxes, int *axes, int otype, int skipna, int keepdims)
{
int iaxes, ndim, retcode;
@@ -2627,10 +2649,10 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
/* The masked selected inner loop */
int use_maskna = 0;
- PyUFuncGenericMaskedFunction maskedinnerloop = NULL;
+ PyUFunc_MaskedStridedInnerLoopFunc *maskedinnerloop = NULL;
NpyAuxData *maskedinnerloopdata = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
/* These parameters come from a TLS global */
int buffersize = 0, errormask = 0;
@@ -2679,12 +2701,12 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
/* Get the appropriate ufunc inner loop */
if (use_maskna) {
- retcode = get_masked_binary_op_function(self, arr, otype,
+ retcode = get_masked_binary_op_function(ufunc, arr, otype,
&otype_dtype, &maskedinnerloop, &maskedinnerloopdata);
}
else {
int otype_final = otype;
- retcode = get_binary_op_function(self, &otype_final,
+ retcode = get_binary_op_function(ufunc, &otype_final,
&innerloop, &innerloopdata);
NPY_UF_DBG_PRINT2("Loop retcode %d, otype final %d\n",
@@ -2726,7 +2748,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
innerloopdata = (void*)op;
}
- /* Allocate an output or conform 'out' to 'self' */
+ /* Allocate an output or conform 'out' to 'ufunc' */
Py_XINCREF(otype_dtype);
result = PyArray_CreateReduceResult(arr, out,
otype_dtype, axis_flags, !skipna && use_maskna,
@@ -2789,7 +2811,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
* copied from 'arr', and create a view of 'arr' containing
* all the elements to reduce into 'result'.
*/
- arr_view = initialize_reduce_result(self->identity, result,
+ arr_view = initialize_reduce_result(ufunc->identity, result,
axis_flags, arr, skipna,
&skip_first_count, ufunc_name);
if (arr_view == NULL) {
@@ -2853,19 +2875,19 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
int needs_api;
NpyIter_IterNextFunc *iternext;
char **dataptr;
- npy_intp *stride;
- npy_intp *count_ptr;
+ npy_intp *strides;
+ npy_intp *countptr;
char *dataptr_copy[3];
- npy_intp stride_copy[3];
+ npy_intp strides_copy[3];
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
goto fail;
}
dataptr = NpyIter_GetDataPtrArray(iter);
- stride = NpyIter_GetInnerStrideArray(iter);
- count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
+ strides = NpyIter_GetInnerStrideArray(iter);
+ countptr = NpyIter_GetInnerLoopSizePtr(iter);
needs_api = NpyIter_IterationNeedsAPI(iter) ||
PyDataType_REFCHK(otype_dtype);
@@ -2878,14 +2900,14 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
if (!use_maskna) {
if (skip_first_count > 0) {
do {
- npy_intp count = *count_ptr;
+ npy_intp count = *countptr;
/* Skip any first-visit elements */
if (NpyIter_IsFirstVisit(iter, 0)) {
- if (stride[0] == 0) {
+ if (strides[0] == 0) {
--count;
--skip_first_count;
- dataptr[1] += stride[1];
+ dataptr[1] += strides[1];
}
else {
skip_first_count -= count;
@@ -2897,11 +2919,11 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ strides_copy, innerloopdata);
/* Jump to the faster loop when skipping is done */
if (skip_first_count == 0) {
@@ -2919,26 +2941,26 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
- innerloop(dataptr_copy, count_ptr,
- stride_copy, innerloopdata);
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
+ innerloop(dataptr_copy, countptr,
+ strides_copy, innerloopdata);
} while (iternext(iter));
}
/* Masked reduction */
else {
if (skip_first_count > 0) {
do {
- npy_intp count = *count_ptr;
+ npy_intp count = *countptr;
/* Skip any first-visit elements */
if (NpyIter_IsFirstVisit(iter, 0)) {
- if (stride[0] == 0) {
+ if (strides[0] == 0) {
--count;
--skip_first_count;
- dataptr[1] += stride[1];
- dataptr[2] += stride[2];
+ dataptr[1] += strides[1];
+ dataptr[2] += strides[2];
}
else {
skip_first_count -= count;
@@ -2950,15 +2972,16 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
/*
* If skipna=True, this masks based on the mask in 'arr',
* otherwise it masks based on the mask in 'result'
*/
- maskedinnerloop(dataptr_copy, dataptr[2], &count,
- stride_copy, stride[2], maskedinnerloopdata);
+ maskedinnerloop(dataptr_copy, strides_copy,
+ dataptr[2], strides[2],
+ count, maskedinnerloopdata);
/* Jump to the faster loop when skipping is done */
if (skip_first_count == 0) {
@@ -2976,15 +2999,16 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
/*
* If skipna=True, this masks based on the mask in 'arr',
* otherwise it masks based on the mask in 'result'
*/
- maskedinnerloop(dataptr_copy, dataptr[2], count_ptr,
- stride_copy, stride[2], maskedinnerloopdata);
+ maskedinnerloop(dataptr_copy, strides_copy,
+ dataptr[2], strides[2],
+ *countptr, maskedinnerloopdata);
} while (iternext(iter));
}
finish_loop:
@@ -3032,7 +3056,7 @@ fail:
static PyObject *
-PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
+PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
int axis, int otype, int skipna)
{
PyArrayObject *op[2];
@@ -3049,7 +3073,7 @@ PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
PyUFuncGenericFunction innerloop = NULL;
void *innerloopdata = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -3084,7 +3108,7 @@ PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
Py_XINCREF(out);
otype_final = otype;
- if (get_binary_op_function(self, &otype_final,
+ if (get_binary_op_function(ufunc, &otype_final,
&innerloop, &innerloopdata) < 0) {
PyArray_Descr *dtype = PyArray_DescrFromType(otype);
PyErr_Format(PyExc_ValueError,
@@ -3421,7 +3445,7 @@ fail:
* output shape is based on the size of indices
*/
static PyObject *
-PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
+PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
PyArrayObject *out, int axis, int otype, int skipna)
{
PyArrayObject *op[3];
@@ -3442,7 +3466,7 @@ PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
PyUFuncGenericFunction innerloop = NULL;
void *innerloopdata = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
char *opname = "reduceat";
/* These parameters come from extobj= or from a TLS global */
@@ -3488,7 +3512,7 @@ PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
Py_XINCREF(out);
otype_final = otype;
- if (get_binary_op_function(self, &otype_final,
+ if (get_binary_op_function(ufunc, &otype_final,
&innerloop, &innerloopdata) < 0) {
PyArray_Descr *dtype = PyArray_DescrFromType(otype);
PyErr_Format(PyExc_ValueError,
@@ -3789,7 +3813,7 @@ fail:
* but they are handled separately for speed)
*/
static PyObject *
-PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
+PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
PyObject *kwds, int operation)
{
int i, naxes=0;
@@ -3808,22 +3832,22 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
"dtype", "out", "skipna", NULL};
static char *_reduce_type[] = {"reduce", "accumulate", "reduceat", NULL};
- if (self == NULL) {
+ if (ufunc == NULL) {
PyErr_SetString(PyExc_ValueError, "function not supported");
return NULL;
}
- if (self->core_enabled) {
+ if (ufunc->core_enabled) {
PyErr_Format(PyExc_RuntimeError,
"Reduction not defined on ufunc with signature");
return NULL;
}
- if (self->nin != 2) {
+ if (ufunc->nin != 2) {
PyErr_Format(PyExc_ValueError,
"%s only supported for binary functions",
_reduce_type[operation]);
return NULL;
}
- if (self->nout != 1) {
+ if (ufunc->nout != 1) {
PyErr_Format(PyExc_ValueError,
"%s only supported for functions "
"returning a single value",
@@ -3865,7 +3889,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
}
/* Ensure input is an array */
if (!PyArray_Check(op) && !PyArray_IsScalar(op, Generic)) {
- context = Py_BuildValue("O(O)i", self, op, 0);
+ context = Py_BuildValue("O(O)i", ufunc, op, 0);
}
else {
context = NULL;
@@ -4004,8 +4028,8 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
*/
int typenum = PyArray_TYPE(mp);
if ((PyTypeNum_ISBOOL(typenum) || PyTypeNum_ISINTEGER(typenum))
- && ((strcmp(self->name,"add") == 0)
- || (strcmp(self->name,"multiply") == 0))) {
+ && ((strcmp(ufunc->name,"add") == 0)
+ || (strcmp(ufunc->name,"multiply") == 0))) {
if (PyTypeNum_ISBOOL(typenum)) {
typenum = NPY_LONG;
}
@@ -4024,7 +4048,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
switch(operation) {
case UFUNC_REDUCE:
- ret = PyUFunc_Reduce(self, mp, out, naxes, axes,
+ ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes,
otype->type_num, skipna, keepdims);
break;
case UFUNC_ACCUMULATE:
@@ -4035,7 +4059,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
Py_DECREF(mp);
return NULL;
}
- ret = (PyArrayObject *)PyUFunc_Accumulate(self, mp, out, axes[0],
+ ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0],
otype->type_num, skipna);
break;
case UFUNC_REDUCEAT:
@@ -4046,7 +4070,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
Py_DECREF(mp);
return NULL;
}
- ret = (PyArrayObject *)PyUFunc_Reduceat(self, mp, indices, out,
+ ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc, mp, indices, out,
axes[0], otype->type_num, skipna);
Py_DECREF(indices);
break;
@@ -4218,7 +4242,7 @@ _find_array_wrap(PyObject *args, PyObject *kwds,
static PyObject *
-ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
int i;
PyTupleObject *ret;
@@ -4232,19 +4256,19 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
* Initialize all array objects to NULL to make cleanup easier
* if something goes wrong.
*/
- for(i = 0; i < self->nargs; i++) {
+ for(i = 0; i < ufunc->nargs; i++) {
mps[i] = NULL;
}
- errval = PyUFunc_GenericFunction(self, args, kwds, mps);
+ errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps);
if (errval < 0) {
- for (i = 0; i < self->nargs; i++) {
+ for (i = 0; i < ufunc->nargs; i++) {
PyArray_XDECREF_ERR(mps[i]);
}
if (errval == -1) {
return NULL;
}
- else if (self->nin == 2 && self->nout == 1) {
+ else if (ufunc->nin == 2 && ufunc->nout == 1) {
/* To allow the other argument to be given a chance */
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
@@ -4257,7 +4281,7 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
}
/* Free the input references */
- for (i = 0; i < self->nin; i++) {
+ for (i = 0; i < ufunc->nin; i++) {
Py_XDECREF(mps[i]);
}
@@ -4278,11 +4302,11 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
* None --- array-object passed in don't call PyArray_Return
* method --- the __array_wrap__ method to call.
*/
- _find_array_wrap(args, kwds, wraparr, self->nin, self->nout);
+ _find_array_wrap(args, kwds, wraparr, ufunc->nin, ufunc->nout);
/* wrap outputs */
- for (i = 0; i < self->nout; i++) {
- int j = self->nin+i;
+ for (i = 0; i < ufunc->nout; i++) {
+ int j = ufunc->nin+i;
PyObject *wrap = wraparr[i];
if (wrap != NULL) {
@@ -4291,7 +4315,7 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
retobj[i] = (PyObject *)mps[j];
continue;
}
- res = PyObject_CallFunction(wrap, "O(OOi)", mps[j], self, args, i);
+ res = PyObject_CallFunction(wrap, "O(OOi)", mps[j], ufunc, args, i);
if (res == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
PyErr_Clear();
res = PyObject_CallFunctionObjArgs(wrap, mps[j], NULL);
@@ -4316,19 +4340,19 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
}
- if (self->nout == 1) {
+ if (ufunc->nout == 1) {
return retobj[0];
}
else {
- ret = (PyTupleObject *)PyTuple_New(self->nout);
- for (i = 0; i < self->nout; i++) {
+ ret = (PyTupleObject *)PyTuple_New(ufunc->nout);
+ for (i = 0; i < ufunc->nout; i++) {
PyTuple_SET_ITEM(ret, i, retobj[i]);
}
return (PyObject *)ret;
}
fail:
- for (i = self->nin; i < self->nargs; i++) {
+ for (i = ufunc->nin; i < ufunc->nargs; i++) {
Py_XDECREF(mps[i]);
}
return NULL;
@@ -4484,59 +4508,61 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
char *name, char *doc,
int check_return, const char *signature)
{
- PyUFuncObject *self;
+ PyUFuncObject *ufunc;
- self = PyArray_malloc(sizeof(PyUFuncObject));
- if (self == NULL) {
+ ufunc = PyArray_malloc(sizeof(PyUFuncObject));
+ if (ufunc == NULL) {
return NULL;
}
- PyObject_Init((PyObject *)self, &PyUFunc_Type);
+ PyObject_Init((PyObject *)ufunc, &PyUFunc_Type);
- self->nin = nin;
- self->nout = nout;
- self->nargs = nin+nout;
- self->identity = identity;
+ ufunc->nin = nin;
+ ufunc->nout = nout;
+ ufunc->nargs = nin+nout;
+ ufunc->identity = identity;
- self->functions = func;
- self->data = data;
- self->types = types;
- self->ntypes = ntypes;
- self->check_return = check_return;
- self->ptr = NULL;
- self->obj = NULL;
- self->userloops=NULL;
+ ufunc->functions = func;
+ ufunc->data = data;
+ ufunc->types = types;
+ ufunc->ntypes = ntypes;
+ ufunc->check_return = check_return;
+ ufunc->ptr = NULL;
+ ufunc->obj = NULL;
+ ufunc->userloops=NULL;
- self->type_resolution_function = &PyUFunc_DefaultTypeResolution;
- self->type_resolution_masked_function =
- &PyUFunc_DefaultTypeResolutionMasked;
+ /* Type resolution and inner loop selection functions */
+ ufunc->type_resolution_function = &PyUFunc_DefaultTypeResolution;
+ ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector;
+ ufunc->inner_loop_selector = NULL;
+ ufunc->masked_inner_loop_selector = &PyUFunc_DefaultMaskedInnerLoopSelector;
if (name == NULL) {
- self->name = "?";
+ ufunc->name = "?";
}
else {
- self->name = name;
+ ufunc->name = name;
}
if (doc == NULL) {
- self->doc = "NULL";
+ ufunc->doc = "NULL";
}
else {
- self->doc = doc;
+ ufunc->doc = doc;
}
/* generalized ufunc */
- self->core_enabled = 0;
- self->core_num_dim_ix = 0;
- self->core_num_dims = NULL;
- self->core_dim_ixs = NULL;
- self->core_offsets = NULL;
- self->core_signature = NULL;
+ ufunc->core_enabled = 0;
+ ufunc->core_num_dim_ix = 0;
+ ufunc->core_num_dims = NULL;
+ ufunc->core_dim_ixs = NULL;
+ ufunc->core_offsets = NULL;
+ ufunc->core_signature = NULL;
if (signature != NULL) {
- if (_parse_signature(self, signature) != 0) {
- Py_DECREF(self);
+ if (_parse_signature(ufunc, signature) != 0) {
+ Py_DECREF(ufunc);
return NULL;
}
}
- return (PyObject *)self;
+ return (PyObject *)ufunc;
}
/* Specify that the loop specified by the given index should use the array of
@@ -4556,6 +4582,12 @@ PyUFunc_SetUsesArraysAsData(void **data, size_t i)
*
* NOTE: This is easier to specify with the type_resolution_function
* in the ufunc object.
+ *
+ * TODO: Remove this, since this is already basically broken
+ * with the addition of the masked inner loops and
+ * not worth fixing since the new loop selection functions
+ * have access to the full dtypes and can dynamically allocate
+ * arbitrary auxiliary data.
*/
static int
_does_loop_use_arrays(void *data)
@@ -4749,34 +4781,34 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
static void
-ufunc_dealloc(PyUFuncObject *self)
+ufunc_dealloc(PyUFuncObject *ufunc)
{
- if (self->core_num_dims) {
- PyArray_free(self->core_num_dims);
+ if (ufunc->core_num_dims) {
+ PyArray_free(ufunc->core_num_dims);
}
- if (self->core_dim_ixs) {
- PyArray_free(self->core_dim_ixs);
+ if (ufunc->core_dim_ixs) {
+ PyArray_free(ufunc->core_dim_ixs);
}
- if (self->core_offsets) {
- PyArray_free(self->core_offsets);
+ if (ufunc->core_offsets) {
+ PyArray_free(ufunc->core_offsets);
}
- if (self->core_signature) {
- PyArray_free(self->core_signature);
+ if (ufunc->core_signature) {
+ PyArray_free(ufunc->core_signature);
}
- if (self->ptr) {
- PyArray_free(self->ptr);
+ if (ufunc->ptr) {
+ PyArray_free(ufunc->ptr);
}
- Py_XDECREF(self->userloops);
- Py_XDECREF(self->obj);
- PyArray_free(self);
+ Py_XDECREF(ufunc->userloops);
+ Py_XDECREF(ufunc->obj);
+ PyArray_free(ufunc);
}
static PyObject *
-ufunc_repr(PyUFuncObject *self)
+ufunc_repr(PyUFuncObject *ufunc)
{
char buf[100];
- sprintf(buf, "<ufunc '%.50s'>", self->name);
+ sprintf(buf, "<ufunc '%.50s'>", ufunc->name);
return PyUString_FromString(buf);
}
@@ -4793,7 +4825,7 @@ ufunc_repr(PyUFuncObject *self)
* The result has dimensions a.ndim + b.ndim
*/
static PyObject *
-ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_outer(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
int i;
PyObject *ret;
@@ -4801,14 +4833,14 @@ ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
PyObject *new_args, *tmp;
PyObject *shape1, *shape2, *newshape;
- if (self->core_enabled) {
+ if (ufunc->core_enabled) {
PyErr_Format(PyExc_TypeError,
"method outer is not allowed in ufunc with non-trivial"\
" signature");
return NULL;
}
- if(self->nin != 2) {
+ if(ufunc->nin != 2) {
PyErr_SetString(PyExc_ValueError,
"outer product only supported "\
"for binary functions");
@@ -4871,7 +4903,7 @@ ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
Py_DECREF(ap1);
Py_DECREF(ap2);
Py_DECREF(ap_new);
- ret = ufunc_generic_call(self, new_args, kwds);
+ ret = ufunc_generic_call(ufunc, new_args, kwds);
Py_DECREF(new_args);
return ret;
@@ -4884,21 +4916,21 @@ ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
static PyObject *
-ufunc_reduce(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_reduce(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
- return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCE);
+ return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_REDUCE);
}
static PyObject *
-ufunc_accumulate(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_accumulate(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
- return PyUFunc_GenericReduction(self, args, kwds, UFUNC_ACCUMULATE);
+ return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_ACCUMULATE);
}
static PyObject *
-ufunc_reduceat(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_reduceat(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
- return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCEAT);
+ return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_REDUCEAT);
}
@@ -4959,7 +4991,7 @@ _typecharfromnum(int num) {
}
static PyObject *
-ufunc_get_doc(PyUFuncObject *self)
+ufunc_get_doc(PyUFuncObject *ufunc)
{
/*
* Put docstring first or FindMethod finds it... could so some
@@ -4968,20 +5000,20 @@ ufunc_get_doc(PyUFuncObject *self)
* construct name(x1, x2, ...,[ out1, out2, ...]) __doc__
*/
PyObject *outargs, *inargs, *doc;
- outargs = _makeargs(self->nout, "out", 1);
- inargs = _makeargs(self->nin, "x", 0);
+ outargs = _makeargs(ufunc->nout, "out", 1);
+ inargs = _makeargs(ufunc->nin, "x", 0);
if (outargs == NULL) {
doc = PyUString_FromFormat("%s(%s)\n\n%s",
- self->name,
+ ufunc->name,
PyString_AS_STRING(inargs),
- self->doc);
+ ufunc->doc);
}
else {
doc = PyUString_FromFormat("%s(%s[, %s])\n\n%s",
- self->name,
+ ufunc->name,
PyString_AS_STRING(inargs),
PyString_AS_STRING(outargs),
- self->doc);
+ ufunc->doc);
Py_DECREF(outargs);
}
Py_DECREF(inargs);
@@ -4989,38 +5021,38 @@ ufunc_get_doc(PyUFuncObject *self)
}
static PyObject *
-ufunc_get_nin(PyUFuncObject *self)
+ufunc_get_nin(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->nin);
+ return PyInt_FromLong(ufunc->nin);
}
static PyObject *
-ufunc_get_nout(PyUFuncObject *self)
+ufunc_get_nout(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->nout);
+ return PyInt_FromLong(ufunc->nout);
}
static PyObject *
-ufunc_get_nargs(PyUFuncObject *self)
+ufunc_get_nargs(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->nargs);
+ return PyInt_FromLong(ufunc->nargs);
}
static PyObject *
-ufunc_get_ntypes(PyUFuncObject *self)
+ufunc_get_ntypes(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->ntypes);
+ return PyInt_FromLong(ufunc->ntypes);
}
static PyObject *
-ufunc_get_types(PyUFuncObject *self)
+ufunc_get_types(PyUFuncObject *ufunc)
{
/* return a list with types grouped input->output */
PyObject *list;
PyObject *str;
- int k, j, n, nt = self->ntypes;
- int ni = self->nin;
- int no = self->nout;
+ int k, j, n, nt = ufunc->ntypes;
+ int ni = ufunc->nin;
+ int no = ufunc->nout;
char *t;
list = PyList_New(nt);
if (list == NULL) {
@@ -5030,13 +5062,13 @@ ufunc_get_types(PyUFuncObject *self)
n = 0;
for (k = 0; k < nt; k++) {
for (j = 0; j<ni; j++) {
- t[j] = _typecharfromnum(self->types[n]);
+ t[j] = _typecharfromnum(ufunc->types[n]);
n++;
}
t[ni] = '-';
t[ni+1] = '>';
for (j = 0; j < no; j++) {
- t[ni + 2 + j] = _typecharfromnum(self->types[n]);
+ t[ni + 2 + j] = _typecharfromnum(ufunc->types[n]);
n++;
}
str = PyUString_FromStringAndSize(t, no + ni + 2);
@@ -5047,15 +5079,15 @@ ufunc_get_types(PyUFuncObject *self)
}
static PyObject *
-ufunc_get_name(PyUFuncObject *self)
+ufunc_get_name(PyUFuncObject *ufunc)
{
- return PyUString_FromString(self->name);
+ return PyUString_FromString(ufunc->name);
}
static PyObject *
-ufunc_get_identity(PyUFuncObject *self)
+ufunc_get_identity(PyUFuncObject *ufunc)
{
- switch(self->identity) {
+ switch(ufunc->identity) {
case PyUFunc_One:
return PyInt_FromLong(1);
case PyUFunc_Zero:
@@ -5065,12 +5097,12 @@ ufunc_get_identity(PyUFuncObject *self)
}
static PyObject *
-ufunc_get_signature(PyUFuncObject *self)
+ufunc_get_signature(PyUFuncObject *ufunc)
{
- if (!self->core_enabled) {
+ if (!ufunc->core_enabled) {
Py_RETURN_NONE;
}
- return PyUString_FromString(self->core_signature);
+ return PyUString_FromString(ufunc->core_signature);
}
#undef _typecharfromnum
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 4bdc9cbb9..8a2041a24 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -122,8 +122,7 @@ ensure_dtype_nbo(PyArray_Descr *type)
/*UFUNC_API
*
* This function applies the default type resolution rules
- * for the provided ufunc, filling out_dtypes, out_innerloop,
- * and out_innerloopdata.
+ * for the provided ufunc.
*
* Returns 0 on success, -1 on error.
*/
@@ -132,9 +131,7 @@ PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int i, nop = ufunc->nin + ufunc->nout;
int retval = 0, any_object = 0;
@@ -151,20 +148,20 @@ PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
/*
* Decide the casting rules for inputs and outputs. We want
* NPY_SAFE_CASTING or stricter, so that the loop selection code
- * doesn't choose an integer loop for float inputs, for example.
+ * doesn't choose an integer loop for float inputs, or a float32
+ * loop for float64 inputs.
*/
input_casting = (casting > NPY_SAFE_CASTING) ? NPY_SAFE_CASTING : casting;
if (type_tup == NULL) {
/* Find the best ufunc inner loop, and fill in the dtypes */
- retval = find_best_ufunc_inner_loop(ufunc, operands,
+ retval = linear_search_type_resolution(ufunc, operands,
input_casting, casting, any_object,
- out_dtypes, out_innerloop, out_innerloopdata);
+ out_dtypes);
} else {
/* Find the specified ufunc inner loop, and fill in the dtypes */
- retval = find_specified_ufunc_inner_loop(ufunc, type_tup,
- operands, casting, any_object, out_dtypes,
- out_innerloop, out_innerloopdata);
+ retval = type_tuple_type_resolution(ufunc, type_tup,
+ operands, casting, any_object, out_dtypes);
}
return retval;
@@ -187,11 +184,9 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
- int i, type_num, type_num1, type_num2;
+ int i, type_num1, type_num2;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -213,7 +208,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
if (type_num1 >= NPY_NTYPES || type_num2 >= NPY_NTYPES ||
type_num1 == NPY_OBJECT || type_num2 == NPY_OBJECT) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_tup == NULL) {
@@ -232,8 +227,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
*/
if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
return PyUFunc_DefaultTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
if (!PyArray_DescrCheck(PyTuple_GET_ITEM(type_tup, 0))) {
@@ -270,31 +264,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- type_num = out_dtypes[0]->type_num;
-
- /* If we have a built-in type, search in the functions list */
- if (type_num < NPY_NTYPES) {
- char *types = ufunc->types;
- int n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "ufunc '%s' not supported for the input types",
- ufunc_name);
- return -1;
- }
- else {
- PyErr_SetString(PyExc_RuntimeError,
- "user type shouldn't have resulted from type promotion");
- return -1;
- }
+ return 0;
}
/*
@@ -313,11 +283,9 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
- int i, type_num, type_num1;
+ int i, type_num1;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -337,7 +305,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
type_num1 = PyArray_DESCR(operands[0])->type_num;
if (type_num1 >= NPY_NTYPES || type_num1 == NPY_OBJECT) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_tup == NULL) {
@@ -356,8 +324,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
*/
if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
return PyUFunc_DefaultTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
if (!PyArray_DescrCheck(PyTuple_GET_ITEM(type_tup, 0))) {
@@ -384,31 +351,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- type_num = out_dtypes[0]->type_num;
-
- /* If we have a built-in type, search in the functions list */
- if (type_num < NPY_NTYPES) {
- char *types = ufunc->types;
- int n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[2*i] == type_num) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "ufunc '%s' not supported for the input types",
- ufunc_name);
- return -1;
- }
- else {
- PyErr_SetString(PyExc_RuntimeError,
- "user type shouldn't have resulted from type promotion");
- return -1;
- }
+ return 0;
}
/*
@@ -421,14 +364,11 @@ PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING NPY_UNUSED(casting),
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
return PyUFunc_SimpleUnaryOperationTypeResolution(ufunc,
NPY_UNSAFE_CASTING,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
@@ -449,11 +389,9 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
- int i, type_num, type_num1, type_num2;
+ int i, type_num1, type_num2;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -475,7 +413,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
if (type_num1 >= NPY_NTYPES || type_num2 >= NPY_NTYPES ||
type_num1 == NPY_OBJECT || type_num2 == NPY_OBJECT) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_tup == NULL) {
@@ -496,8 +434,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
*/
if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
return PyUFunc_DefaultTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
if (!PyArray_DescrCheck(PyTuple_GET_ITEM(type_tup, 0))) {
@@ -526,31 +463,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- type_num = out_dtypes[0]->type_num;
-
- /* If we have a built-in type, search in the functions list */
- if (type_num < NPY_NTYPES) {
- char *types = ufunc->types;
- int n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "ufunc '%s' not supported for the input types",
- ufunc_name);
- return -1;
- }
- else {
- PyErr_SetString(PyExc_RuntimeError,
- "user type shouldn't have resulted from type promotion");
- return -1;
- }
+ return 0;
}
/*
@@ -565,19 +478,16 @@ PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
/* Use the default for complex types, to find the loop producing float */
if (PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[0])->type_num)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
else {
return PyUFunc_SimpleUnaryOperationTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
}
@@ -672,13 +582,10 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -689,7 +596,7 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -831,22 +738,7 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -879,13 +771,10 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -896,7 +785,7 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -1019,22 +908,7 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -1064,13 +938,10 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -1081,7 +952,7 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -1180,22 +1051,7 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -1224,13 +1080,10 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -1241,7 +1094,7 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -1317,22 +1170,7 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -1349,6 +1187,121 @@ type_reso_error: {
}
}
+static int
+find_userloop(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata)
+{
+ npy_intp i, nin = ufunc->nin, j, nargs = nin + ufunc->nout;
+ PyUFunc_Loop1d *funcdata;
+
+ /* Use this to try to avoid repeating the same userdef loop search */
+ int last_userdef = -1;
+
+ for (i = 0; i < nin; ++i) {
+ int type_num = dtypes[i]->type_num;
+ if (type_num != last_userdef && PyTypeNum_ISUSERDEF(type_num)) {
+ PyObject *key, *obj;
+
+ last_userdef = type_num;
+
+ key = PyInt_FromLong(type_num);
+ if (key == NULL) {
+ return -1;
+ }
+ obj = PyDict_GetItem(ufunc->userloops, key);
+ Py_DECREF(key);
+ if (obj == NULL) {
+ continue;
+ }
+ funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
+ while (funcdata != NULL) {
+ int *types = funcdata->arg_types;
+
+ for (j = 0; j < nargs; ++j) {
+ if (types[j] != dtypes[j]->type_num) {
+ break;
+ }
+ }
+ /* It matched */
+ if (j == nargs) {
+ *out_innerloop = funcdata->func;
+ *out_innerloopdata = funcdata->data;
+ return 1;
+ }
+
+ funcdata = funcdata->next;
+ }
+ }
+ }
+
+ /* Didn't find a match */
+ return 0;
+}
+
+NPY_NO_EXPORT int
+PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata)
+{
+ int nargs = ufunc->nargs;
+ char *types;
+ const char *ufunc_name;
+ PyObject *errmsg;
+ int i, j;
+
+ ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
+
+ /*
+ * If there are user-loops search them first.
+ * TODO: There needs to be a loop selection acceleration structure,
+ * like a hash table.
+ */
+ if (ufunc->userloops) {
+ switch (find_userloop(ufunc, dtypes,
+ out_innerloop, out_innerloopdata)) {
+ /* Error */
+ case -1:
+ return -1;
+ /* Found a loop */
+ case 1:
+ return 0;
+ }
+ }
+
+ types = ufunc->types;
+ for (i = 0; i < ufunc->ntypes; ++i) {
+ /* Copy the types into an int array for matching */
+ for (j = 0; j < nargs; ++j) {
+ if (types[j] != dtypes[j]->type_num) {
+ break;
+ }
+ }
+ if (j == nargs) {
+ *out_innerloop = ufunc->functions[i];
+ *out_innerloopdata = ufunc->data[i];
+ return 0;
+ }
+
+ types += nargs;
+ }
+
+ errmsg = PyUString_FromFormat("ufunc '%s' did not contain a loop "
+ "with signature matching types ", ufunc_name);
+ for (i = 0; i < nargs; ++i) {
+ PyUString_ConcatAndDel(&errmsg,
+ PyObject_Repr((PyObject *)dtypes[i]));
+ if (i < nargs - 1) {
+ PyUString_ConcatAndDel(&errmsg, PyUString_FromString(" "));
+ }
+ }
+ PyErr_SetObject(PyExc_TypeError, errmsg);
+
+ return -1;
+}
+
typedef struct {
NpyAuxData base;
PyUFuncGenericFunction unmasked_innerloop;
@@ -1380,25 +1333,22 @@ ufunc_masker_data_clone(NpyAuxData *data)
*/
static void
unmasked_ufunc_loop_as_masked(
- char **args,
- char *mask,
- npy_intp *dimensions,
- npy_intp *steps,
- npy_intp mask_stride,
+ char **dataptrs, npy_intp *strides,
+ char *mask, npy_intp mask_stride,
+ npy_intp loopsize,
NpyAuxData *innerloopdata)
{
_ufunc_masker_data *data;
int iargs, nargs;
PyUFuncGenericFunction unmasked_innerloop;
void *unmasked_innerloopdata;
- npy_intp loopsize, subloopsize;
+ npy_intp subloopsize;
/* Put the aux data into local variables */
data = (_ufunc_masker_data *)innerloopdata;
unmasked_innerloop = data->unmasked_innerloop;
unmasked_innerloopdata = data->unmasked_innerloopdata;
nargs = data->nargs;
- loopsize = *dimensions;
/* Process the data as runs of unmasked values */
do {
@@ -1410,7 +1360,7 @@ unmasked_ufunc_loop_as_masked(
mask += mask_stride;
}
for (iargs = 0; iargs < nargs; ++iargs) {
- args[iargs] += subloopsize * steps[iargs];
+ dataptrs[iargs] += subloopsize * strides[iargs];
}
loopsize -= subloopsize;
/*
@@ -1423,35 +1373,40 @@ unmasked_ufunc_loop_as_masked(
++subloopsize;
mask += mask_stride;
}
- unmasked_innerloop(args, &subloopsize, steps, unmasked_innerloopdata);
+ unmasked_innerloop(dataptrs, &subloopsize, strides,
+ unmasked_innerloopdata);
for (iargs = 0; iargs < nargs; ++iargs) {
- args[iargs] += subloopsize * steps[iargs];
+ dataptrs[iargs] += subloopsize * strides[iargs];
}
loopsize -= subloopsize;
} while (loopsize > 0);
}
-/*UFUNC_API
- *
- * This function calls the unmasked type resolution function of the
- * ufunc, then wraps it with a function which only calls the inner
- * loop where the mask is True.
+/*
+ * This function wraps a legacy inner loop so it becomes masked.
*
* Returns 0 on success, -1 on error.
*/
NPY_NO_EXPORT int
-PyUFunc_DefaultTypeResolutionMasked(PyUFuncObject *ufunc,
- NPY_CASTING casting,
- PyArrayObject **operands,
- PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericMaskedFunction *out_innerloop,
- NpyAuxData **out_innerloopdata)
+PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *NPY_UNUSED(fixed_strides),
+ npy_intp NPY_UNUSED(fixed_mask_stride),
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata)
{
int retcode;
_ufunc_masker_data *data;
+ if (ufunc->legacy_inner_loop_selector == NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "the ufunc default masked inner loop selector doesn't "
+ "yet support wrapping the new inner loop selector, it "
+ "still only wraps the legacy inner loop selector");
+ return -1;
+ }
+
/* Create a new NpyAuxData object for the masker data */
data = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data));
if (data == NULL) {
@@ -1464,8 +1419,7 @@ PyUFunc_DefaultTypeResolutionMasked(PyUFuncObject *ufunc,
data->nargs = ufunc->nin + ufunc->nout;
/* Get the unmasked ufunc inner loop */
- retcode = ufunc->type_resolution_function(ufunc, casting,
- operands, type_tup, out_dtypes,
+ retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
&data->unmasked_innerloop, &data->unmasked_innerloopdata);
if (retcode < 0) {
PyArray_free(data);
@@ -1597,15 +1551,13 @@ set_ufunc_loop_data_types(PyUFuncObject *self, PyArrayObject **op,
* Does a search through the arguments and the loops
*/
static int
-find_ufunc_matching_userloop(PyUFuncObject *self,
+linear_search_userloop_type_resolution(PyUFuncObject *self,
PyArrayObject **op,
NPY_CASTING input_casting,
NPY_CASTING output_casting,
int any_object,
int use_min_scalar,
PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata,
int *out_no_castable_output,
char *out_err_src_typecode,
char *out_err_dst_typecode)
@@ -1647,11 +1599,6 @@ find_ufunc_matching_userloop(PyUFuncObject *self,
/* Found a match */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = funcdata->func;
- *out_innerloopdata = funcdata->data;
-
return 0;
}
@@ -1668,16 +1615,14 @@ find_ufunc_matching_userloop(PyUFuncObject *self,
* Does a search through the arguments and the loops
*/
static int
-find_ufunc_specified_userloop(PyUFuncObject *self,
+type_tuple_userloop_type_resolution(PyUFuncObject *self,
int n_specified,
int *specified_types,
PyArrayObject **op,
NPY_CASTING casting,
int any_object,
int use_min_scalar,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtype)
{
int i, j, nin = self->nin, nop = nin + self->nout;
PyUFunc_Loop1d *funcdata;
@@ -1734,11 +1679,6 @@ find_ufunc_specified_userloop(PyUFuncObject *self,
/* It works */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = funcdata->func;
- *out_innerloopdata = funcdata->data;
-
return 0;
/* Didn't match */
case 0:
@@ -1838,14 +1778,12 @@ should_use_min_scalar(PyArrayObject **op, int nop)
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_best_ufunc_inner_loop(PyUFuncObject *self,
+linear_search_type_resolution(PyUFuncObject *self,
PyArrayObject **op,
NPY_CASTING input_casting,
NPY_CASTING output_casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtype)
{
npy_intp i, j, nin = self->nin, nop = nin + self->nout;
int types[NPY_MAXARGS];
@@ -1861,10 +1799,9 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
/* If the ufunc has userloops, search for them. */
if (self->userloops) {
- switch (find_ufunc_matching_userloop(self, op,
+ switch (linear_search_userloop_type_resolution(self, op,
input_casting, output_casting,
- any_object, use_min_scalar,
- out_dtype, out_innerloop, out_innerloopdata,
+ any_object, use_min_scalar, out_dtype,
&no_castable_output, &err_src_typecode,
&err_dst_typecode)) {
/* Error */
@@ -1913,14 +1850,8 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
/* Found a match */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
-
return 0;
}
-
}
/* If no function was found, throw an error */
@@ -1955,14 +1886,12 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_specified_ufunc_inner_loop(PyUFuncObject *self,
+type_tuple_type_resolution(PyUFuncObject *self,
PyObject *type_tup,
PyArrayObject **op,
NPY_CASTING casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtype)
{
npy_intp i, j, n, nin = self->nin, nop = nin + self->nout;
int n_specified = 0;
@@ -2061,11 +1990,11 @@ find_specified_ufunc_inner_loop(PyUFuncObject *self,
/* If the ufunc has userloops, search for them. */
if (self->userloops) {
- switch (find_ufunc_specified_userloop(self,
+ switch (type_tuple_userloop_type_resolution(self,
n_specified, specified_types,
op, casting,
any_object, use_min_scalar,
- out_dtype, out_innerloop, out_innerloopdata)) {
+ out_dtype)) {
/* Error */
case -1:
return -1;
@@ -2112,11 +2041,6 @@ find_specified_ufunc_inner_loop(PyUFuncObject *self,
/* It worked */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
-
return 0;
/* Didn't work */
case 0:
@@ -2128,7 +2052,6 @@ find_specified_ufunc_inner_loop(PyUFuncObject *self,
ufunc_name);
return -1;
}
-
}
/* If no function was found, throw an error */
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index f1ded2e9b..dad2b6c6c 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -6,80 +6,63 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
+
NPY_NO_EXPORT int
PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
/*
* Does a linear search for the best inner loop of the ufunc.
@@ -88,14 +71,12 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_best_ufunc_inner_loop(PyUFuncObject *self,
+linear_search_type_resolution(PyUFuncObject *self,
PyArrayObject **op,
NPY_CASTING input_casting,
NPY_CASTING output_casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtype);
/*
* Does a linear search for the inner loop of the ufunc specified by type_tup.
@@ -104,13 +85,26 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_specified_ufunc_inner_loop(PyUFuncObject *self,
+type_tuple_type_resolution(PyUFuncObject *self,
PyObject *type_tup,
PyArrayObject **op,
NPY_CASTING casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtype);
+
+NPY_NO_EXPORT int
+PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata);
+
+NPY_NO_EXPORT int
+PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *NPY_UNUSED(fixed_strides),
+ npy_intp NPY_UNUSED(fixed_mask_stride),
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata);
+
#endif
diff --git a/numpy/core/src/umath/umathmodule.c.src b/numpy/core/src/umath/umathmodule.c.src
index 52dcd4c1b..02098f458 100644
--- a/numpy/core/src/umath/umathmodule.c.src
+++ b/numpy/core/src/umath/umathmodule.c.src
@@ -50,22 +50,30 @@ object_ufunc_type_resolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int i, nop = ufunc->nin + ufunc->nout;
+ PyArray_Descr *obj_dtype;
- out_dtypes[0] = PyArray_DescrFromType(NPY_OBJECT);
- if (out_dtypes[0] == NULL) {
+ obj_dtype = PyArray_DescrFromType(NPY_OBJECT);
+ if (obj_dtype == NULL) {
return -1;
}
- for (i = 1; i < nop; ++i) {
- out_dtypes[i] = out_dtypes[0];
- Py_INCREF(out_dtypes[0]);
+ for (i = 0; i < nop; ++i) {
+ Py_INCREF(obj_dtype);
+ out_dtypes[i] = obj_dtype;
}
+ return 0;
+}
+
+static int
+object_ufunc_loop_selector(PyUFuncObject *ufunc,
+ PyArray_Descr **NPY_UNUSED(dtypes),
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata)
+{
*out_innerloop = ufunc->functions[0];
*out_innerloopdata = ufunc->data[0];
@@ -115,6 +123,7 @@ ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUS
self->core_signature = NULL;
self->type_resolution_function = &object_ufunc_type_resolution;
+ self->legacy_inner_loop_selector = &object_ufunc_loop_selector;
pyname = PyObject_GetAttrString(function, "__name__");
if (pyname) {