summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-21 00:28:20 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:58 -0600
commitf3d60f9696856b47f4f5818b751eba2f7dcc402a (patch)
treece70ecf0a621b47537b24e1534c618c95ee2b597
parent4c88ab3e0020861488d77b6930d32474a7cce709 (diff)
downloadnumpy-f3d60f9696856b47f4f5818b751eba2f7dcc402a.tar.gz
ENH: ufunc: Separate type resolution from loop selection
This change sets the stage for being able to produce much higher performance loops by specializing based on any fixed strides. To do this, type resolution must happen first to determine the dtypes, then the iterator needs to be created, then finally the loop needs to be selected for the given dtypes and the strides that will be fixed throughout the iteration. The way type resolution and loop selection where tied together before precluded this possibility, but with the backwards-compatible design change in this commit, such specializations will be possible in the future.
-rw-r--r--numpy/core/code_generators/numpy_api.py1
-rw-r--r--numpy/core/include/numpy/ufuncobject.h179
-rw-r--r--numpy/core/src/umath/ufunc_object.c766
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c467
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h64
-rw-r--r--numpy/core/src/umath/umathmodule.c.src25
6 files changed, 757 insertions, 745 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 76e006c65..2e159db06 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -386,7 +386,6 @@ ufunc_funcs_api = {
# End 1.6 API
'PyUFunc_DefaultTypeResolution': 39,
'PyUFunc_ValidateCasting': 40,
- 'PyUFunc_DefaultTypeResolutionMasked': 41,
}
# List of all the dicts which define the C API
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 47b195b2f..c0a2308b5 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -7,23 +7,36 @@
extern "C" {
#endif
-/* The most generic inner loop for a standard element-wise ufunc */
+/*
+ * The legacy generic inner loop for a standard element-wise or
+ * generalized ufunc.
+ */
typedef void (*PyUFuncGenericFunction)
(char **args,
npy_intp *dimensions,
- npy_intp *steps,
+ npy_intp *strides,
void *innerloopdata);
/*
- * The most generic inner loop for a masked standard element-wise ufunc.
+ * The most generic one-dimensional inner loop for
+ * a standard element-wise ufunc. This typedef is also
+ * more consistent with the other NumPy function pointer typedefs
+ * than PyUFuncGenericFunction.
*/
-typedef void (*PyUFuncGenericMaskedFunction)
- (char **args,
- char *mask_arg,
- npy_intp *dimensions,
- npy_intp *steps,
- npy_intp mask_step,
- NpyAuxData *innerloopdata);
+typedef void (PyUFunc_StridedInnerLoopFunc)(
+ char **dataptrs, npy_intp *strides,
+ npy_intp count,
+ NpyAuxData *innerloopdata);
+
+/*
+ * The most generic one-dimensional inner loop for
+ * a masked standard element-wise ufunc.
+ */
+typedef void (PyUFunc_MaskedStridedInnerLoopFunc)(
+ char **dataptrs, npy_intp *strides,
+ char *maskptr, npy_intp mask_stride,
+ npy_intp count,
+ NpyAuxData *innerloopdata);
/* Forward declaration for the type resolution function */
struct _tagPyUFuncObject;
@@ -49,10 +62,6 @@ struct _tagPyUFuncObject;
* references to (ufunc->nin + ufunc->nout) new
* dtypes, one for each input and output. These
* dtypes should all be in native-endian format.
- * out_innerloop: Should be populated with the correct ufunc inner
- * loop for the given type.
- * out_innerloopdata: Should be populated with the void* data to
- * be passed into the out_innerloop function.
*
* Should return 0 on success, -1 on failure (with exception set),
* or -2 if Py_NotImplemented should be returned.
@@ -62,17 +71,53 @@ typedef int (PyUFunc_TypeResolutionFunc)(
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
-typedef int (PyUFunc_TypeResolutionMaskedFunc)(
- struct _tagPyUFuncObject *ufunc,
- NPY_CASTING casting,
- PyArrayObject **operands,
- PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericMaskedFunction *out_innerloop,
- NpyAuxData **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
+
+/*
+ * Given an array of DTypes as returned by the PyUFunc_TypeResolutionFunc,
+ * and an array of fixed strides (the array will contain NPY_MAX_INTP for
+ * strides which are not necessarily fixed), returns an inner loop
+ * with associated auxiliary data.
+ *
+ * For backwards compatibility, there is a variant of the inner loop
+ * selection which returns an inner loop irrespective of the strides,
+ * and with a void* static auxiliary data instead of an NpyAuxData *
+ * dynamically allocatable auxiliary data.
+ *
+ * ufunc: The ufunc object.
+ * dtypes: An array which has been populated with dtypes,
+ * in most cases by the type resolution funciton
+ * for the same ufunc.
+ * fixed_strides: For each input/output, either the stride that
+ * will be used every time the function is called
+ * or NPY_MAX_INTP if the stride might change or
+ * is not known ahead of time. The loop selection
+ * function may use this stride to pick inner loops
+ * which are optimized for contiguous or 0-stride
+ * cases.
+ * out_innerloop: Should be populated with the correct ufunc inner
+ * loop for the given type.
+ * out_innerloopdata: Should be populated with the void* data to
+ * be passed into the out_innerloop function.
+ */
+typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)(
+ struct _tagPyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata);
+typedef int (PyUFunc_InnerLoopSelectionFunc)(
+ struct _tagPyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *fixed_strides,
+ PyUFunc_StridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata);
+typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)(
+ struct _tagPyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *fixed_strides,
+ npy_intp fixed_mask_stride,
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata);
typedef struct _tagPyUFuncObject {
PyObject_HEAD
@@ -137,17 +182,27 @@ typedef struct _tagPyUFuncObject {
char *core_signature;
/*
- * A function which resolves the types and returns an inner loop.
- * This is used by the regular ufunc, the reduction operations
- * have a different set of rules.
+ * A function which resolves the types and fills an array
+ * with the dtypes for the inputs and outputs.
*/
PyUFunc_TypeResolutionFunc *type_resolution_function;
/*
- * A function which resolves the types and returns an inner loop.
- * This is used by the regular ufunc when it requires using
- * a mask to select which elements to compute.
+ * A function which returns an inner loop written for
+ * NumPy 1.6 and earlier ufuncs. This is for backwards
+ * compatibility, and may be NULL if inner_loop_selector
+ * is specified.
*/
- PyUFunc_TypeResolutionMaskedFunc *type_resolution_masked_function;
+ PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector;
+ /*
+ * A function which returns an inner loop for the new mechanism
+ * in NumPy 1.7 and later. If provided, this is used, otherwise
+ * if NULL the legacy_inner_loop_selector is used instead.
+ */
+ PyUFunc_InnerLoopSelectionFunc *inner_loop_selector;
+ /*
+ * A function which returns a masked inner loop for the ufunc.
+ */
+ PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
} PyUFuncObject;
#include "arrayobject.h"
@@ -231,12 +286,12 @@ typedef struct _loop1d_info {
#define UFUNC_PYVALS_NAME "UFUNC_PYVALS"
-#define UFUNC_CHECK_ERROR(arg) \
- do {if ((((arg)->obj & UFUNC_OBJ_NEEDS_API) && PyErr_Occurred()) || \
- ((arg)->errormask && \
- PyUFunc_checkfperr((arg)->errormask, \
- (arg)->errobj, \
- &(arg)->first))) \
+#define UFUNC_CHECK_ERROR(arg) \
+ do {if ((((arg)->obj & UFUNC_OBJ_NEEDS_API) && PyErr_Occurred()) || \
+ ((arg)->errormask && \
+ PyUFunc_checkfperr((arg)->errormask, \
+ (arg)->errobj, \
+ &(arg)->first))) \
goto fail;} while (0)
/* This code checks the IEEE status flags in a platform-dependent way */
@@ -251,12 +306,12 @@ typedef struct _loop1d_info {
#include <machine/fpu.h>
-#define UFUNC_CHECK_STATUS(ret) { \
- unsigned long fpstatus; \
- \
- fpstatus = ieee_get_fp_control(); \
+#define UFUNC_CHECK_STATUS(ret) { \
+ unsigned long fpstatus; \
+ \
+ fpstatus = ieee_get_fp_control(); \
/* clear status bits as well as disable exception mode if on */ \
- ieee_set_fp_control( 0 ); \
+ ieee_set_fp_control( 0 ); \
ret = ((IEEE_STATUS_DZE & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
| ((IEEE_STATUS_OVF & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
| ((IEEE_STATUS_UNF & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
@@ -273,13 +328,13 @@ typedef struct _loop1d_info {
#define UFUNC_NOFPE _control87(MCW_EM, MCW_EM);
#endif
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus = (int) _clearfp(); \
- \
+#define UFUNC_CHECK_STATUS(ret) { \
+ int fpstatus = (int) _clearfp(); \
+ \
ret = ((SW_ZERODIVIDE & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((SW_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
+ | ((SW_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
| ((SW_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((SW_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
+ | ((SW_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
}
/* Solaris --------------------------------------------------------*/
@@ -290,15 +345,15 @@ typedef struct _loop1d_info {
defined(__NetBSD__)
#include <ieeefp.h>
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus; \
- \
- fpstatus = (int) fpgetsticky(); \
- ret = ((FP_X_DZ & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
- | ((FP_X_OFL & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
- | ((FP_X_UFL & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
- | ((FP_X_INV & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- (void) fpsetsticky(0); \
+#define UFUNC_CHECK_STATUS(ret) { \
+ int fpstatus; \
+ \
+ fpstatus = (int) fpgetsticky(); \
+ ret = ((FP_X_DZ & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
+ | ((FP_X_OFL & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
+ | ((FP_X_UFL & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
+ | ((FP_X_INV & fpstatus) ? UFUNC_FPE_INVALID : 0); \
+ (void) fpsetsticky(0); \
}
#elif defined(__GLIBC__) || defined(__APPLE__) || \
@@ -312,15 +367,15 @@ typedef struct _loop1d_info {
#include "fenv/fenv.c"
#endif
-#define UFUNC_CHECK_STATUS(ret) { \
- int fpstatus = (int) fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | \
- FE_UNDERFLOW | FE_INVALID); \
+#define UFUNC_CHECK_STATUS(ret) { \
+ int fpstatus = (int) fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | \
+ FE_UNDERFLOW | FE_INVALID); \
ret = ((FE_DIVBYZERO & fpstatus) ? UFUNC_FPE_DIVIDEBYZERO : 0) \
| ((FE_OVERFLOW & fpstatus) ? UFUNC_FPE_OVERFLOW : 0) \
| ((FE_UNDERFLOW & fpstatus) ? UFUNC_FPE_UNDERFLOW : 0) \
| ((FE_INVALID & fpstatus) ? UFUNC_FPE_INVALID : 0); \
- (void) feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | \
- FE_UNDERFLOW | FE_INVALID); \
+ (void) feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | \
+ FE_UNDERFLOW | FE_INVALID); \
}
#elif defined(_AIX)
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 643e1d6f0..e222b4945 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -39,6 +39,7 @@
#include "numpy/noprefix.h"
#include "numpy/ufuncobject.h"
#include "lowlevel_strided_loops.h"
+#include "ufunc_type_resolution.h"
#include "ufunc_object.h"
@@ -544,11 +545,11 @@ _is_same_name(const char* s1, const char* s2)
/*
* Sets core_num_dim_ix, core_num_dims, core_dim_ixs, core_offsets,
- * and core_signature in PyUFuncObject "self". Returns 0 unless an
+ * and core_signature in PyUFuncObject "ufunc". Returns 0 unless an
* error occured.
*/
static int
-_parse_signature(PyUFuncObject *self, const char *signature)
+_parse_signature(PyUFuncObject *ufunc, const char *signature)
{
size_t len;
char const **var_names;
@@ -565,9 +566,9 @@ _parse_signature(PyUFuncObject *self, const char *signature)
}
len = strlen(signature);
- self->core_signature = PyArray_malloc(sizeof(char) * (len+1));
- if (self->core_signature) {
- strcpy(self->core_signature, signature);
+ ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1));
+ if (ufunc->core_signature) {
+ strcpy(ufunc->core_signature, signature);
}
/* Allocate sufficient memory to store pointers to all dimension names */
var_names = PyArray_malloc(sizeof(char const*) * len);
@@ -576,13 +577,13 @@ _parse_signature(PyUFuncObject *self, const char *signature)
return -1;
}
- self->core_enabled = 1;
- self->core_num_dim_ix = 0;
- self->core_num_dims = PyArray_malloc(sizeof(int) * self->nargs);
- self->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
- self->core_offsets = PyArray_malloc(sizeof(int) * self->nargs);
- if (self->core_num_dims == NULL || self->core_dim_ixs == NULL
- || self->core_offsets == NULL) {
+ ufunc->core_enabled = 1;
+ ufunc->core_num_dim_ix = 0;
+ ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs);
+ ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); /* shrink this later */
+ ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs);
+ if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL
+ || ufunc->core_offsets == NULL) {
PyErr_NoMemory();
goto fail;
}
@@ -590,7 +591,7 @@ _parse_signature(PyUFuncObject *self, const char *signature)
i = _next_non_white_space(signature, 0);
while (signature[i] != '\0') {
/* loop over input/output arguments */
- if (cur_arg == self->nin) {
+ if (cur_arg == ufunc->nin) {
/* expect "->" */
if (signature[i] != '-' || signature[i+1] != '>') {
parse_error = "expect '->'";
@@ -615,17 +616,17 @@ _parse_signature(PyUFuncObject *self, const char *signature)
parse_error = "expect dimension name";
goto fail;
}
- while (j < self->core_num_dim_ix) {
+ while (j < ufunc->core_num_dim_ix) {
if (_is_same_name(signature+i, var_names[j])) {
break;
}
j++;
}
- if (j >= self->core_num_dim_ix) {
+ if (j >= ufunc->core_num_dim_ix) {
var_names[j] = signature+i;
- self->core_num_dim_ix++;
+ ufunc->core_num_dim_ix++;
}
- self->core_dim_ixs[cur_core_dim] = j;
+ ufunc->core_dim_ixs[cur_core_dim] = j;
cur_core_dim++;
nd++;
i = _get_end_of_name(signature, i);
@@ -643,13 +644,13 @@ _parse_signature(PyUFuncObject *self, const char *signature)
}
}
}
- self->core_num_dims[cur_arg] = nd;
- self->core_offsets[cur_arg] = cur_core_dim-nd;
+ ufunc->core_num_dims[cur_arg] = nd;
+ ufunc->core_offsets[cur_arg] = cur_core_dim-nd;
cur_arg++;
nd = 0;
i = _next_non_white_space(signature, i + 1);
- if (cur_arg != self->nin && cur_arg != self->nargs) {
+ if (cur_arg != ufunc->nin && cur_arg != ufunc->nargs) {
/*
* The list of input arguments (or output arguments) was
* only read partially
@@ -661,15 +662,15 @@ _parse_signature(PyUFuncObject *self, const char *signature)
i = _next_non_white_space(signature, i + 1);
}
}
- if (cur_arg != self->nargs) {
+ if (cur_arg != ufunc->nargs) {
parse_error = "incomplete signature: not all arguments found";
goto fail;
}
- self->core_dim_ixs = PyArray_realloc(self->core_dim_ixs,
+ ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs,
sizeof(int)*cur_core_dim);
/* check for trivial core-signature, e.g. "(),()->()" */
if (cur_core_dim == 0) {
- self->core_enabled = 0;
+ ufunc->core_enabled = 0;
}
PyArray_free((void*)var_names);
return 0;
@@ -701,7 +702,7 @@ fail:
* non-zero references in out_op. This
* function does not do its own clean-up.
*/
-static int get_ufunc_arguments(PyUFuncObject *self,
+static int get_ufunc_arguments(PyUFuncObject *ufunc,
PyObject *args, PyObject *kwds,
PyArrayObject **out_op,
NPY_ORDER *out_order,
@@ -712,7 +713,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
PyArrayObject **out_wheremask,
int *out_use_maskna)
{
- int i, nargs, nin = self->nin, nout = self->nout;
+ int i, nargs, nin = ufunc->nin, nout = ufunc->nout;
PyObject *obj, *context;
PyObject *str_key_obj = NULL;
char *ufunc_name;
@@ -720,7 +721,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
int any_flexible = 0, any_object = 0;
int any_non_maskna_out = 0, any_maskna_out = 0;
- ufunc_name = self->name ? self->name : "<unnamed ufunc>";
+ ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
*out_extobj = NULL;
*out_typetup = NULL;
@@ -730,7 +731,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
/* Check number of arguments */
nargs = PyTuple_Size(args);
- if ((nargs < nin) || (nargs > self->nargs)) {
+ if ((nargs < nin) || (nargs > ufunc->nargs)) {
PyErr_SetString(PyExc_ValueError, "invalid number of arguments");
return -1;
}
@@ -746,7 +747,7 @@ static int get_ufunc_arguments(PyUFuncObject *self,
* TODO: There should be a comment here explaining what
* context does.
*/
- context = Py_BuildValue("OOi", self, args, i);
+ context = Py_BuildValue("OOi", ufunc, args, i);
if (context == NULL) {
return -1;
}
@@ -1036,12 +1037,12 @@ fail:
* -1 if there is an error.
*/
static int
-check_for_trivial_loop(PyUFuncObject *self,
+check_for_trivial_loop(PyUFuncObject *ufunc,
PyArrayObject **op,
PyArray_Descr **dtype,
npy_intp buffersize)
{
- npy_intp i, nin = self->nin, nop = nin + self->nout;
+ npy_intp i, nin = ufunc->nin, nop = nin + ufunc->nout;
for (i = 0; i < nop; ++i) {
/*
@@ -1152,7 +1153,7 @@ trivial_three_operand_loop(PyArrayObject **op,
* exactly the same, which may be more strict than before.
*/
static int
-prepare_ufunc_output(PyUFuncObject *self,
+prepare_ufunc_output(PyUFuncObject *ufunc,
PyArrayObject **op,
PyObject *arr_prep,
PyObject *arr_prep_args,
@@ -1163,7 +1164,7 @@ prepare_ufunc_output(PyUFuncObject *self,
PyArrayObject *arr;
res = PyObject_CallFunction(arr_prep, "O(OOi)",
- *op, self, arr_prep_args, i);
+ *op, ufunc, arr_prep_args, i);
if ((res == NULL) || (res == Py_None) || !PyArray_Check(res)) {
if (!PyErr_Occurred()){
PyErr_SetString(PyExc_TypeError,
@@ -1207,7 +1208,7 @@ prepare_ufunc_output(PyUFuncObject *self,
}
static int
-iterator_loop(PyUFuncObject *self,
+iterator_loop(PyUFuncObject *ufunc,
PyArrayObject **op,
PyArray_Descr **dtype,
NPY_ORDER order,
@@ -1217,7 +1218,7 @@ iterator_loop(PyUFuncObject *self,
PyUFuncGenericFunction innerloop,
void *innerloopdata)
{
- npy_intp i, nin = self->nin, nout = self->nout;
+ npy_intp i, nin = ufunc->nin, nout = ufunc->nout;
npy_intp nop = nin + nout;
npy_uint32 op_flags[NPY_MAXARGS];
NpyIter *iter;
@@ -1278,7 +1279,7 @@ iterator_loop(PyUFuncObject *self,
/* Call the __array_prepare__ functions where necessary */
for (i = 0; i < nout; ++i) {
- if (prepare_ufunc_output(self, &op[nin+i],
+ if (prepare_ufunc_output(ufunc, &op[nin+i],
arr_prep[i], arr_prep_args, i) < 0) {
NpyIter_Deallocate(iter);
return -1;
@@ -1341,18 +1342,27 @@ iterator_loop(PyUFuncObject *self,
* innerloopdata - data to pass to the inner loop
*/
static int
-execute_ufunc_loop(PyUFuncObject *self,
+execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
int trivial_loop_ok,
PyArrayObject **op,
- PyArray_Descr **dtype,
+ PyArray_Descr **dtypes,
NPY_ORDER order,
npy_intp buffersize,
PyObject **arr_prep,
- PyObject *arr_prep_args,
- PyUFuncGenericFunction innerloop,
- void *innerloopdata)
+ PyObject *arr_prep_args)
{
- npy_intp nin = self->nin, nout = self->nout;
+ npy_intp nin = ufunc->nin, nout = ufunc->nout;
+ PyUFuncGenericFunction innerloop;
+ void *innerloopdata;
+
+ if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+ &innerloop, &innerloopdata) < 0) {
+ return -1;
+ }
+ /* If the loop wants the arrays, provide them. */
+ if (_does_loop_use_arrays(innerloopdata)) {
+ innerloopdata = (void*)op;
+ }
/* First check for the trivial cases that don't need an iterator */
if (trivial_loop_ok) {
@@ -1360,9 +1370,9 @@ execute_ufunc_loop(PyUFuncObject *self,
if (op[1] == NULL &&
(order == NPY_ANYORDER || order == NPY_KEEPORDER) &&
PyArray_TRIVIALLY_ITERABLE(op[0])) {
- Py_INCREF(dtype[1]);
+ Py_INCREF(dtypes[1]);
op[1] = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- dtype[1],
+ dtypes[1],
PyArray_NDIM(op[0]),
PyArray_DIMS(op[0]),
NULL, NULL,
@@ -1371,7 +1381,7 @@ execute_ufunc_loop(PyUFuncObject *self,
NULL);
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[1],
+ if (prepare_ufunc_output(ufunc, &op[1],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1386,7 +1396,7 @@ execute_ufunc_loop(PyUFuncObject *self,
PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) {
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[1],
+ if (prepare_ufunc_output(ufunc, &op[1],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1412,9 +1422,9 @@ execute_ufunc_loop(PyUFuncObject *self,
else {
tmp = op[1];
}
- Py_INCREF(dtype[2]);
+ Py_INCREF(dtypes[2]);
op[2] = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- dtype[2],
+ dtypes[2],
PyArray_NDIM(tmp),
PyArray_DIMS(tmp),
NULL, NULL,
@@ -1423,7 +1433,7 @@ execute_ufunc_loop(PyUFuncObject *self,
NULL);
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[2],
+ if (prepare_ufunc_output(ufunc, &op[2],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1439,7 +1449,7 @@ execute_ufunc_loop(PyUFuncObject *self,
PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2])) {
/* Call the __prepare_array__ if necessary */
- if (prepare_ufunc_output(self, &op[2],
+ if (prepare_ufunc_output(ufunc, &op[2],
arr_prep[0], arr_prep_args, 0) < 0) {
return -1;
}
@@ -1458,7 +1468,7 @@ execute_ufunc_loop(PyUFuncObject *self,
*/
NPY_UF_DBG_PRINT("iterator loop\n");
- if (iterator_loop(self, op, dtype, order,
+ if (iterator_loop(ufunc, op, dtypes, order,
buffersize, arr_prep, arr_prep_args,
innerloop, innerloopdata) < 0) {
return -1;
@@ -1513,19 +1523,17 @@ combine_ufunc_maskna(char **masks, npy_intp *strides, npy_intp count,
* innerloopdata - data to pass to the inner loop
*/
static int
-execute_ufunc_masked_loop(PyUFuncObject *self,
+execute_ufunc_masked_loop(PyUFuncObject *ufunc,
PyArrayObject *wheremask,
int use_maskna,
PyArrayObject **op,
- PyArray_Descr **dtype,
+ PyArray_Descr **dtypes,
NPY_ORDER order,
npy_intp buffersize,
PyObject **arr_prep,
- PyObject *arr_prep_args,
- PyUFuncGenericMaskedFunction innerloop,
- NpyAuxData *innerloopdata)
+ PyObject *arr_prep_args)
{
- int i, nin = self->nin, nout = self->nout;
+ int i, nin = ufunc->nin, nout = ufunc->nout;
int nop = nin + nout;
npy_uint32 op_flags[NPY_MAXARGS];
NpyIter *iter;
@@ -1534,8 +1542,8 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
NpyIter_IterNextFunc *iternext;
char **dataptr;
- npy_intp *stride;
- npy_intp *count_ptr;
+ npy_intp *strides;
+ npy_intp *countptr;
PyArrayObject **op_it;
@@ -1548,7 +1556,7 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
return -1;
}
op[nop] = wheremask;
- dtype[nop] = NULL;
+ dtypes[nop] = NULL;
default_op_out_flags |= NPY_ITER_WRITEMASKED;
}
@@ -1598,7 +1606,7 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
NPY_ITER_BUFFERED |
NPY_ITER_GROWINNER,
order, NPY_UNSAFE_CASTING,
- op_flags, dtype,
+ op_flags, dtypes,
0, NULL, NULL, buffersize);
if (iter == NULL) {
return -1;
@@ -1619,7 +1627,7 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
/* Call the __array_prepare__ functions where necessary */
for (i = 0; i < nout; ++i) {
- if (prepare_ufunc_output(self, &op[nin+i],
+ if (prepare_ufunc_output(ufunc, &op[nin+i],
arr_prep[i], arr_prep_args, i) < 0) {
NpyIter_Deallocate(iter);
return -1;
@@ -1628,6 +1636,9 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
/* Only do the loop if the iteration size is non-zero */
if (NpyIter_GetIterSize(iter) != 0) {
+ PyUFunc_MaskedStridedInnerLoopFunc *innerloop;
+ NpyAuxData *innerloopdata;
+ npy_intp fixed_strides[2*NPY_MAXARGS];
/* Validate that the prepare_ufunc_output didn't mess with pointers */
for (i = nin; i < nop; ++i) {
@@ -1640,6 +1651,20 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
}
}
+ /*
+ * Get the inner loop, with the possibility of specialization
+ * based on the fixed strides.
+ */
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
+ fixed_strides,
+ wheremask != NULL ? fixed_strides[nop]
+ : fixed_strides[nop + nin],
+ &innerloop, &innerloopdata) < 0) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
@@ -1647,8 +1672,8 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
return -1;
}
dataptr = NpyIter_GetDataPtrArray(iter);
- stride = NpyIter_GetInnerStrideArray(iter);
- count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
+ strides = NpyIter_GetInnerStrideArray(iter);
+ countptr = NpyIter_GetInnerLoopSizePtr(iter);
if (!needs_api) {
NPY_BEGIN_THREADS;
@@ -1658,26 +1683,30 @@ execute_ufunc_masked_loop(PyUFuncObject *self,
/* Execute the loop */
if (wheremask != NULL) {
do {
- NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr);
- innerloop(dataptr, dataptr[nop], count_ptr,
- stride, stride[nop], innerloopdata);
+ NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr);
+ innerloop(dataptr, strides,
+ dataptr[nop], strides[nop],
+ *countptr, innerloopdata);
} while (iternext(iter));
}
else {
do {
- NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*count_ptr);
+ NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr);
/* Combine the input NA masks for the output */
- combine_ufunc_maskna(&dataptr[nop], &stride[nop], *count_ptr,
+ combine_ufunc_maskna(&dataptr[nop], &strides[nop], *countptr,
nin, nout);
/* Evaluate the ufunc wherever the NA mask says */
- innerloop(dataptr, dataptr[nop + nin], count_ptr,
- stride, stride[nop + nin], innerloopdata);
+ innerloop(dataptr, strides,
+ dataptr[nop + nin], strides[nop + nin],
+ *countptr, innerloopdata);
} while (iternext(iter));
}
if (!needs_api) {
NPY_END_THREADS;
}
+
+ NPY_AUXDATA_FREE(innerloopdata);
}
NpyIter_Deallocate(iter);
@@ -1723,7 +1752,7 @@ make_arr_prep_args(npy_intp nin, PyObject *args, PyObject *kwds)
}
static int
-PyUFunc_GeneralizedFunction(PyUFuncObject *self,
+PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
PyObject *args, PyObject *kwds,
PyArrayObject **op)
{
@@ -1732,7 +1761,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
char *ufunc_name;
int retval = -1, subok = 1;
- PyArray_Descr *dtype[NPY_MAXARGS];
+ PyArray_Descr *dtypes[NPY_MAXARGS];
/* Use remapped axes for generalized ufunc */
int broadcast_ndim, op_ndim;
@@ -1774,30 +1803,30 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
/* When provided, extobj and typetup contain borrowed references */
PyObject *extobj = NULL, *type_tup = NULL;
- if (self == NULL) {
+ if (ufunc == NULL) {
PyErr_SetString(PyExc_ValueError, "function not supported");
return -1;
}
- nin = self->nin;
- nout = self->nout;
+ nin = ufunc->nin;
+ nout = ufunc->nout;
nop = nin + nout;
- ufunc_name = self->name ? self->name : "<unnamed ufunc>";
+ ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
/* Initialize all the operands and dtypes to NULL */
for (i = 0; i < nop; ++i) {
op[i] = NULL;
- dtype[i] = NULL;
+ dtypes[i] = NULL;
arr_prep[i] = NULL;
}
NPY_UF_DBG_PRINT("Getting arguments\n");
/* Get all the arguments */
- retval = get_ufunc_arguments(self, args, kwds,
+ retval = get_ufunc_arguments(ufunc, args, kwds,
op, &order, &casting, &extobj,
&type_tup, &subok, NULL, &use_maskna);
if (retval < 0) {
@@ -1813,12 +1842,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
/* Figure out the number of dimensions needed by the iterator */
broadcast_ndim = 0;
for (i = 0; i < nin; ++i) {
- int n = PyArray_NDIM(op[i]) - self->core_num_dims[i];
+ int n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
if (n > broadcast_ndim) {
broadcast_ndim = n;
}
}
- op_ndim = broadcast_ndim + self->core_num_dim_ix;
+ op_ndim = broadcast_ndim + ufunc->core_num_dim_ix;
if (op_ndim > NPY_MAXDIMS) {
PyErr_Format(PyExc_ValueError,
"too many dimensions for generalized ufunc %s",
@@ -1829,7 +1858,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
/* Fill in op_axes for all the operands */
core_dim_ixs_size = 0;
- core_dim_ixs = self->core_dim_ixs;
+ core_dim_ixs = ufunc->core_dim_ixs;
for (i = 0; i < nop; ++i) {
int n;
if (op[i]) {
@@ -1837,7 +1866,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
* Note that n may be negative if broadcasting
* extends into the core dimensions.
*/
- n = PyArray_NDIM(op[i]) - self->core_num_dims[i];
+ n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
}
else {
n = broadcast_ndim;
@@ -1855,7 +1884,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
for (idim = broadcast_ndim; idim < op_ndim; ++idim) {
op_axes_arrays[i][idim] = -1;
}
- for (idim = 0; idim < self->core_num_dims[i]; ++idim) {
+ for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
if (n + idim >= 0) {
op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] =
n + idim;
@@ -1864,8 +1893,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] = -1;
}
}
- core_dim_ixs_size += self->core_num_dims[i];
- core_dim_ixs += self->core_num_dims[i];
+ core_dim_ixs_size += ufunc->core_num_dims[i];
+ core_dim_ixs += ufunc->core_num_dims[i];
op_axes[i] = op_axes_arrays[i];
}
@@ -1888,8 +1917,14 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
NPY_UF_DBG_PRINT("Finding inner loop\n");
- retval = self->type_resolution_function(self, casting,
- op, type_tup, dtype, &innerloop, &innerloopdata);
+ retval = ufunc->type_resolution_function(ufunc, casting,
+ op, type_tup, dtypes);
+ if (retval < 0) {
+ goto fail;
+ }
+ /* For the generalized ufunc, we get the loop right away too */
+ retval = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+ &innerloop, &innerloopdata);
if (retval < 0) {
goto fail;
}
@@ -1900,7 +1935,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
* an attribute (signalling it can handle ndarray's)
* and is not already an ndarray or a subtype of the same type.
*/
- if (nin == 2 && nout == 1 && dtype[1]->type_num == NPY_OBJECT) {
+ if (nin == 2 && nout == 1 && dtypes[1]->type_num == NPY_OBJECT) {
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
if (!PyArray_CheckExact(_obj)
/* If both are same subtype of object arrays, then proceed */
@@ -1915,12 +1950,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
#if NPY_UF_DBG_TRACING
printf("input types:\n");
for (i = 0; i < nin; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\noutput types:\n");
for (i = nin; i < nop; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\n");
@@ -1969,7 +2004,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
NPY_ITER_REFS_OK|
NPY_ITER_REDUCE_OK,
order, NPY_UNSAFE_CASTING, op_flags,
- dtype, op_ndim, op_axes, NULL, 0);
+ dtypes, op_ndim, op_axes, NULL, 0);
if (iter == NULL) {
retval = -1;
goto fail;
@@ -1990,9 +2025,9 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
inner_strides = (npy_intp *)PyArray_malloc(
NPY_SIZEOF_INTP * (nop+core_dim_ixs_size));
/* The strides after the first nop match core_dim_ixs */
- core_dim_ixs = self->core_dim_ixs;
+ core_dim_ixs = ufunc->core_dim_ixs;
inner_strides_tmp = inner_strides + nop;
- for (idim = 0; idim < self->core_num_dim_ix; ++idim) {
+ for (idim = 0; idim < ufunc->core_num_dim_ix; ++idim) {
ax_strides_tmp[idim] = NpyIter_GetAxisStrideArray(iter,
broadcast_ndim+idim);
if (ax_strides_tmp[idim] == NULL) {
@@ -2001,12 +2036,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
}
}
for (i = 0; i < nop; ++i) {
- for (idim = 0; idim < self->core_num_dims[i]; ++idim) {
+ for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
inner_strides_tmp[idim] = ax_strides_tmp[core_dim_ixs[idim]][i];
}
- core_dim_ixs += self->core_num_dims[i];
- inner_strides_tmp += self->core_num_dims[i];
+ core_dim_ixs += ufunc->core_num_dims[i];
+ inner_strides_tmp += ufunc->core_num_dims[i];
}
/* Set up the inner dimensions array */
@@ -2016,10 +2051,10 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
}
/* Move the core dimensions to start at the second element */
memmove(&inner_dimensions[1], &inner_dimensions[broadcast_ndim],
- NPY_SIZEOF_INTP * self->core_num_dim_ix);
+ NPY_SIZEOF_INTP * ufunc->core_num_dim_ix);
/* Remove all the core dimensions from the iterator */
- for (i = 0; i < self->core_num_dim_ix; ++i) {
+ for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) {
retval = -1;
goto fail;
@@ -2087,7 +2122,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *self,
NpyIter_Deallocate(iter);
/* The caller takes ownership of all the references in op */
for (i = 0; i < nop; ++i) {
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2109,7 +2144,7 @@ fail:
for (i = 0; i < nop; ++i) {
Py_XDECREF(op[i]);
op[i] = NULL;
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2127,7 +2162,7 @@ fail:
* 'op' is an array of at least NPY_MAXARGS PyArrayObject *.
*/
NPY_NO_EXPORT int
-PyUFunc_GenericFunction(PyUFuncObject *self,
+PyUFunc_GenericFunction(PyUFuncObject *ufunc,
PyObject *args, PyObject *kwds,
PyArrayObject **op)
{
@@ -2137,24 +2172,13 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
int retval = -1, subok = 1;
int usemaskedloop = 0;
- PyArray_Descr *dtype[NPY_MAXARGS];
+ PyArray_Descr *dtypes[NPY_MAXARGS];
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
PyObject *errobj = NULL;
int first_error = 1;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
-
- /*
- * The selected masked inner loop, when the 'where='
- * parameter or arrays with missing values are in op.
- */
- PyUFuncGenericMaskedFunction masked_innerloop = NULL;
- NpyAuxData *masked_innerloopdata = NULL;
-
/* The mask provided in the 'where=' parameter */
PyArrayObject *wheremask = NULL;
@@ -2174,34 +2198,34 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
/* When provided, extobj and typetup contain borrowed references */
PyObject *extobj = NULL, *type_tup = NULL;
- if (self == NULL) {
+ if (ufunc == NULL) {
PyErr_SetString(PyExc_ValueError, "function not supported");
return -1;
}
- if (self->core_enabled) {
- return PyUFunc_GeneralizedFunction(self, args, kwds, op);
+ if (ufunc->core_enabled) {
+ return PyUFunc_GeneralizedFunction(ufunc, args, kwds, op);
}
- nin = self->nin;
- nout = self->nout;
+ nin = ufunc->nin;
+ nout = ufunc->nout;
nop = nin + nout;
- ufunc_name = self->name ? self->name : "<unnamed ufunc>";
+ ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name);
/* Initialize all the operands and dtypes to NULL */
for (i = 0; i < nop; ++i) {
op[i] = NULL;
- dtype[i] = NULL;
+ dtypes[i] = NULL;
arr_prep[i] = NULL;
}
NPY_UF_DBG_PRINT("Getting arguments\n");
/* Get all the arguments */
- retval = get_ufunc_arguments(self, args, kwds,
+ retval = get_ufunc_arguments(ufunc, args, kwds,
op, &order, &casting, &extobj,
&type_tup, &subok, &wheremask, &use_maskna);
if (retval < 0) {
@@ -2245,30 +2269,19 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
NPY_UF_DBG_PRINT("Finding inner loop\n");
- if (usemaskedloop) {
- retval = self->type_resolution_masked_function(self, casting,
- op, type_tup, dtype,
- &masked_innerloop, &masked_innerloopdata);
- if (retval < 0) {
- goto fail;
- }
+ retval = ufunc->type_resolution_function(ufunc, casting,
+ op, type_tup, dtypes);
+ if (retval < 0) {
+ goto fail;
}
- else {
- retval = self->type_resolution_function(self, casting,
- op, type_tup, dtype,
- &innerloop, &innerloopdata);
- if (retval < 0) {
- goto fail;
- }
+ /* Only do the trivial loop check for the unmasked version. */
+ if (!usemaskedloop) {
/*
- * This checks whether a trivial loop is ok,
- * making copies of scalar and one dimensional operands if that will
- * help.
- *
- * Only do the trivial loop check for the unmasked version.
+ * This checks whether a trivial loop is ok, making copies of
+ * scalar and one dimensional operands if that will help.
*/
- trivial_loop_ok = check_for_trivial_loop(self, op, dtype, buffersize);
+ trivial_loop_ok = check_for_trivial_loop(ufunc, op, dtypes, buffersize);
if (trivial_loop_ok < 0) {
goto fail;
}
@@ -2280,7 +2293,7 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
* an attribute (signalling it can handle ndarray's)
* and is not already an ndarray or a subtype of the same type.
*/
- if (nin == 2 && nout == 1 && dtype[1]->type_num == NPY_OBJECT) {
+ if (nin == 2 && nout == 1 && dtypes[1]->type_num == NPY_OBJECT) {
PyObject *_obj = PyTuple_GET_ITEM(args, 1);
if (!PyArray_CheckExact(_obj)
/* If both are same subtype of object arrays, then proceed */
@@ -2296,12 +2309,12 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
#if NPY_UF_DBG_TRACING
printf("input types:\n");
for (i = 0; i < nin; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\noutput types:\n");
for (i = nin; i < nop; ++i) {
- PyObject_Print((PyObject *)dtype[i], stdout, 0);
+ PyObject_Print((PyObject *)dtypes[i], stdout, 0);
printf(" ");
}
printf("\n");
@@ -2323,17 +2336,6 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
}
}
- /*
- * If the loop wants the arrays, provide them.
- *
- * TODO: Remove this, since this is already basically broken
- * with the addition of the masked inner loops and
- * not worth fixing.
- */
- if (!usemaskedloop && _does_loop_use_arrays(innerloopdata)) {
- innerloopdata = (void*)op;
- }
-
/* Start with the floating-point exception flags cleared */
PyUFunc_clearfperr();
@@ -2341,18 +2343,29 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
if (usemaskedloop) {
NPY_UF_DBG_PRINT("Executing masked inner loop\n");
- retval = execute_ufunc_masked_loop(self, wheremask, use_maskna,
- op, dtype, order,
- buffersize, arr_prep, arr_prep_args,
- masked_innerloop, masked_innerloopdata);
+ retval = execute_ufunc_masked_loop(ufunc, wheremask, use_maskna,
+ op, dtypes, order,
+ buffersize, arr_prep, arr_prep_args);
}
else {
NPY_UF_DBG_PRINT("Executing unmasked inner loop\n");
- retval = execute_ufunc_loop(self, trivial_loop_ok,
- op, dtype, order,
- buffersize, arr_prep, arr_prep_args,
- innerloop, innerloopdata);
+ if (ufunc->legacy_inner_loop_selector != NULL) {
+ retval = execute_legacy_ufunc_loop(ufunc, trivial_loop_ok,
+ op, dtypes, order,
+ buffersize, arr_prep, arr_prep_args);
+ }
+ else {
+ /*
+ * TODO: When this is supported, it should be preferred over
+ * the legacy_inner_loop_selector
+ */
+ PyErr_SetString(PyExc_RuntimeError,
+ "usage of the new inner_loop_selector isn't "
+ "implemented yet");
+ retval = -1;
+ goto fail;
+ }
}
if (retval < 0) {
goto fail;
@@ -2367,7 +2380,7 @@ PyUFunc_GenericFunction(PyUFuncObject *self,
/* The caller takes ownership of all the references in op */
for (i = 0; i < nop; ++i) {
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2384,7 +2397,7 @@ fail:
for (i = 0; i < nop; ++i) {
Py_XDECREF(op[i]);
op[i] = NULL;
- Py_XDECREF(dtype[i]);
+ Py_XDECREF(dtypes[i]);
Py_XDECREF(arr_prep[i]);
}
Py_XDECREF(errobj);
@@ -2403,7 +2416,7 @@ fail:
* Returns 0 on success, -1 on failure.
*/
static int
-get_binary_op_function(PyUFuncObject *self, int *otype,
+get_binary_op_function(PyUFuncObject *ufunc, int *otype,
PyUFuncGenericFunction *out_innerloop,
void **out_innerloopdata)
{
@@ -2414,13 +2427,13 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
*otype);
/* If the type is custom and there are userloops, search for it here */
- if (self->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
+ if (ufunc->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
PyObject *key, *obj;
key = PyInt_FromLong(*otype);
if (key == NULL) {
return -1;
}
- obj = PyDict_GetItem(self->userloops, key);
+ obj = PyDict_GetItem(ufunc->userloops, key);
Py_DECREF(key);
if (obj != NULL) {
funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
@@ -2440,8 +2453,8 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
}
/* Search for a function with compatible inputs */
- for (i = 0; i < self->ntypes; ++i) {
- char *types = self->types + i*self->nargs;
+ for (i = 0; i < ufunc->ntypes; ++i) {
+ char *types = ufunc->types + i*ufunc->nargs;
NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n",
types[0], types[1], types[2]);
@@ -2451,8 +2464,8 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
(*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
/* If the signature is "xx->x", we found the loop */
if (types[2] == types[0]) {
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
+ *out_innerloop = ufunc->functions[i];
+ *out_innerloopdata = ufunc->data[i];
*otype = types[0];
return 0;
}
@@ -2468,16 +2481,16 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
}
/* Search for the exact function */
- for (i = 0; i < self->ntypes; ++i) {
- char *types = self->types + i*self->nargs;
+ for (i = 0; i < ufunc->ntypes; ++i) {
+ char *types = ufunc->types + i*ufunc->nargs;
if (PyArray_CanCastSafely(*otype, types[0]) &&
types[0] == types[1] &&
types[1] == types[2] &&
(*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
/* Since the signature is "xx->x", we found the loop */
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
+ *out_innerloop = ufunc->functions[i];
+ *out_innerloopdata = ufunc->data[i];
*otype = types[0];
return 0;
}
@@ -2495,17 +2508,17 @@ get_binary_op_function(PyUFuncObject *self, int *otype,
* Returns 0 on success, -1 on failure.
*/
static int
-get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
+get_masked_binary_op_function(PyUFuncObject *ufunc, PyArrayObject *arr,
int otype,
PyArray_Descr **out_dtype,
- PyUFuncGenericMaskedFunction *out_innerloop,
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
NpyAuxData **out_innerloopdata)
{
int i, retcode;
PyArrayObject *op[3] = {arr, arr, NULL};
- PyArray_Descr *dtype[3] = {NULL, NULL, NULL};
- PyObject *type_tup = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
+ npy_intp fixed_strides[3] = {NPY_MAX_INTP, NPY_MAX_INTP, NPY_MAX_INTP};
NPY_UF_DBG_PRINT1("Getting masked binary op function for type number %d\n",
otype);
@@ -2513,49 +2526,58 @@ get_masked_binary_op_function(PyUFuncObject *self, PyArrayObject *arr,
*out_dtype = NULL;
/* Build a type tuple if otype is specified */
- if (otype != NPY_NOTYPE) {
+ if (otype == NPY_NOTYPE) {
+ /* Use the type resolution function to find our loop */
+ retcode = ufunc->type_resolution_function(
+ ufunc, NPY_SAME_KIND_CASTING,
+ op, NULL, dtypes);
+ if (retcode == -1) {
+ return -1;
+ }
+ else if (retcode == -2) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "type resolution returned NotImplemented");
+ return -1;
+ }
+
+ /* The selected dtypes should all be equivalent */
+ if (!PyArray_EquivTypes(dtypes[0], dtypes[1]) ||
+ !PyArray_EquivTypes(dtypes[1], dtypes[2])) {
+ for (i = 0; i < 3; ++i) {
+ Py_DECREF(dtypes[i]);
+ }
+ PyErr_Format(PyExc_RuntimeError,
+ "could not find a type resolution appropriate for "
+ "reduce ufunc %s", ufunc_name);
+ return -1;
+ }
+ }
+ else {
PyArray_Descr *otype_dtype = PyArray_DescrFromType(otype);
if (otype_dtype == NULL) {
return -1;
}
+ dtypes[0] = otype_dtype;
Py_INCREF(otype_dtype);
+ dtypes[1] = otype_dtype;
Py_INCREF(otype_dtype);
- type_tup = Py_BuildValue("(NNN)",
- otype_dtype, otype_dtype, otype_dtype);
- if (type_tup == NULL) {
- return -1;
- }
+ dtypes[2] = otype_dtype;
}
- /* Use the type resolution function to find our loop */
- retcode = self->type_resolution_masked_function(self, NPY_SAME_KIND_CASTING,
- op, type_tup, dtype,
- out_innerloop, out_innerloopdata);
- Py_XDECREF(type_tup);
- if (retcode == -1) {
- return -1;
- }
- else if (retcode == -2) {
- PyErr_SetString(PyExc_RuntimeError,
- "type resolution returned NotImplemented");
- return -1;
- }
+ /* Get the inner loop for the resolved dtypes */
+ if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
+ fixed_strides, NPY_MAX_INTP,
+ out_innerloop, out_innerloopdata) < 0) {
+ Py_DECREF(dtypes[0]);
+ Py_DECREF(dtypes[1]);
+ Py_DECREF(dtypes[2]);
- /* The selected dtypes should all be equivalent */
- if (!PyArray_EquivTypes(dtype[0], dtype[1]) ||
- !PyArray_EquivTypes(dtype[1], dtype[2])) {
- for (i = 0; i < 3; ++i) {
- Py_DECREF(dtype[i]);
- }
- PyErr_Format(PyExc_RuntimeError,
- "could not find a masked binary loop appropriate for "
- "reduce ufunc %s", ufunc_name);
return -1;
}
- *out_dtype = dtype[0];
- Py_DECREF(dtype[1]);
- Py_DECREF(dtype[2]);
+ *out_dtype = dtypes[0];
+ Py_DECREF(dtypes[1]);
+ Py_DECREF(dtypes[2]);
return 0;
}
@@ -2612,7 +2634,7 @@ initialize_reduce_result(int identity, PyArrayObject *result,
* this function does not validate them.
*/
static PyArrayObject *
-PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
+PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
int naxes, int *axes, int otype, int skipna, int keepdims)
{
int iaxes, ndim, retcode;
@@ -2627,10 +2649,10 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
/* The masked selected inner loop */
int use_maskna = 0;
- PyUFuncGenericMaskedFunction maskedinnerloop = NULL;
+ PyUFunc_MaskedStridedInnerLoopFunc *maskedinnerloop = NULL;
NpyAuxData *maskedinnerloopdata = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
/* These parameters come from a TLS global */
int buffersize = 0, errormask = 0;
@@ -2679,12 +2701,12 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
/* Get the appropriate ufunc inner loop */
if (use_maskna) {
- retcode = get_masked_binary_op_function(self, arr, otype,
+ retcode = get_masked_binary_op_function(ufunc, arr, otype,
&otype_dtype, &maskedinnerloop, &maskedinnerloopdata);
}
else {
int otype_final = otype;
- retcode = get_binary_op_function(self, &otype_final,
+ retcode = get_binary_op_function(ufunc, &otype_final,
&innerloop, &innerloopdata);
NPY_UF_DBG_PRINT2("Loop retcode %d, otype final %d\n",
@@ -2726,7 +2748,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
innerloopdata = (void*)op;
}
- /* Allocate an output or conform 'out' to 'self' */
+ /* Allocate an output or conform 'out' to 'ufunc' */
Py_XINCREF(otype_dtype);
result = PyArray_CreateReduceResult(arr, out,
otype_dtype, axis_flags, !skipna && use_maskna,
@@ -2789,7 +2811,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
* copied from 'arr', and create a view of 'arr' containing
* all the elements to reduce into 'result'.
*/
- arr_view = initialize_reduce_result(self->identity, result,
+ arr_view = initialize_reduce_result(ufunc->identity, result,
axis_flags, arr, skipna,
&skip_first_count, ufunc_name);
if (arr_view == NULL) {
@@ -2853,19 +2875,19 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
int needs_api;
NpyIter_IterNextFunc *iternext;
char **dataptr;
- npy_intp *stride;
- npy_intp *count_ptr;
+ npy_intp *strides;
+ npy_intp *countptr;
char *dataptr_copy[3];
- npy_intp stride_copy[3];
+ npy_intp strides_copy[3];
iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
goto fail;
}
dataptr = NpyIter_GetDataPtrArray(iter);
- stride = NpyIter_GetInnerStrideArray(iter);
- count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
+ strides = NpyIter_GetInnerStrideArray(iter);
+ countptr = NpyIter_GetInnerLoopSizePtr(iter);
needs_api = NpyIter_IterationNeedsAPI(iter) ||
PyDataType_REFCHK(otype_dtype);
@@ -2878,14 +2900,14 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
if (!use_maskna) {
if (skip_first_count > 0) {
do {
- npy_intp count = *count_ptr;
+ npy_intp count = *countptr;
/* Skip any first-visit elements */
if (NpyIter_IsFirstVisit(iter, 0)) {
- if (stride[0] == 0) {
+ if (strides[0] == 0) {
--count;
--skip_first_count;
- dataptr[1] += stride[1];
+ dataptr[1] += strides[1];
}
else {
skip_first_count -= count;
@@ -2897,11 +2919,11 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ strides_copy, innerloopdata);
/* Jump to the faster loop when skipping is done */
if (skip_first_count == 0) {
@@ -2919,26 +2941,26 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
- innerloop(dataptr_copy, count_ptr,
- stride_copy, innerloopdata);
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
+ innerloop(dataptr_copy, countptr,
+ strides_copy, innerloopdata);
} while (iternext(iter));
}
/* Masked reduction */
else {
if (skip_first_count > 0) {
do {
- npy_intp count = *count_ptr;
+ npy_intp count = *countptr;
/* Skip any first-visit elements */
if (NpyIter_IsFirstVisit(iter, 0)) {
- if (stride[0] == 0) {
+ if (strides[0] == 0) {
--count;
--skip_first_count;
- dataptr[1] += stride[1];
- dataptr[2] += stride[2];
+ dataptr[1] += strides[1];
+ dataptr[2] += strides[2];
}
else {
skip_first_count -= count;
@@ -2950,15 +2972,16 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
/*
* If skipna=True, this masks based on the mask in 'arr',
* otherwise it masks based on the mask in 'result'
*/
- maskedinnerloop(dataptr_copy, dataptr[2], &count,
- stride_copy, stride[2], maskedinnerloopdata);
+ maskedinnerloop(dataptr_copy, strides_copy,
+ dataptr[2], strides[2],
+ count, maskedinnerloopdata);
/* Jump to the faster loop when skipping is done */
if (skip_first_count == 0) {
@@ -2976,15 +2999,16 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[0] = dataptr[0];
dataptr_copy[1] = dataptr[1];
dataptr_copy[2] = dataptr[0];
- stride_copy[0] = stride[0];
- stride_copy[1] = stride[1];
- stride_copy[2] = stride[0];
+ strides_copy[0] = strides[0];
+ strides_copy[1] = strides[1];
+ strides_copy[2] = strides[0];
/*
* If skipna=True, this masks based on the mask in 'arr',
* otherwise it masks based on the mask in 'result'
*/
- maskedinnerloop(dataptr_copy, dataptr[2], count_ptr,
- stride_copy, stride[2], maskedinnerloopdata);
+ maskedinnerloop(dataptr_copy, strides_copy,
+ dataptr[2], strides[2],
+ *countptr, maskedinnerloopdata);
} while (iternext(iter));
}
finish_loop:
@@ -3032,7 +3056,7 @@ fail:
static PyObject *
-PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
+PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
int axis, int otype, int skipna)
{
PyArrayObject *op[2];
@@ -3049,7 +3073,7 @@ PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
PyUFuncGenericFunction innerloop = NULL;
void *innerloopdata = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -3084,7 +3108,7 @@ PyUFunc_Accumulate(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
Py_XINCREF(out);
otype_final = otype;
- if (get_binary_op_function(self, &otype_final,
+ if (get_binary_op_function(ufunc, &otype_final,
&innerloop, &innerloopdata) < 0) {
PyArray_Descr *dtype = PyArray_DescrFromType(otype);
PyErr_Format(PyExc_ValueError,
@@ -3421,7 +3445,7 @@ fail:
* output shape is based on the size of indices
*/
static PyObject *
-PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
+PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
PyArrayObject *out, int axis, int otype, int skipna)
{
PyArrayObject *op[3];
@@ -3442,7 +3466,7 @@ PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
PyUFuncGenericFunction innerloop = NULL;
void *innerloopdata = NULL;
- char *ufunc_name = self->name ? self->name : "(unknown)";
+ char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
char *opname = "reduceat";
/* These parameters come from extobj= or from a TLS global */
@@ -3488,7 +3512,7 @@ PyUFunc_Reduceat(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *ind,
Py_XINCREF(out);
otype_final = otype;
- if (get_binary_op_function(self, &otype_final,
+ if (get_binary_op_function(ufunc, &otype_final,
&innerloop, &innerloopdata) < 0) {
PyArray_Descr *dtype = PyArray_DescrFromType(otype);
PyErr_Format(PyExc_ValueError,
@@ -3789,7 +3813,7 @@ fail:
* but they are handled separately for speed)
*/
static PyObject *
-PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
+PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
PyObject *kwds, int operation)
{
int i, naxes=0;
@@ -3808,22 +3832,22 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
"dtype", "out", "skipna", NULL};
static char *_reduce_type[] = {"reduce", "accumulate", "reduceat", NULL};
- if (self == NULL) {
+ if (ufunc == NULL) {
PyErr_SetString(PyExc_ValueError, "function not supported");
return NULL;
}
- if (self->core_enabled) {
+ if (ufunc->core_enabled) {
PyErr_Format(PyExc_RuntimeError,
"Reduction not defined on ufunc with signature");
return NULL;
}
- if (self->nin != 2) {
+ if (ufunc->nin != 2) {
PyErr_Format(PyExc_ValueError,
"%s only supported for binary functions",
_reduce_type[operation]);
return NULL;
}
- if (self->nout != 1) {
+ if (ufunc->nout != 1) {
PyErr_Format(PyExc_ValueError,
"%s only supported for functions "
"returning a single value",
@@ -3865,7 +3889,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
}
/* Ensure input is an array */
if (!PyArray_Check(op) && !PyArray_IsScalar(op, Generic)) {
- context = Py_BuildValue("O(O)i", self, op, 0);
+ context = Py_BuildValue("O(O)i", ufunc, op, 0);
}
else {
context = NULL;
@@ -4004,8 +4028,8 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
*/
int typenum = PyArray_TYPE(mp);
if ((PyTypeNum_ISBOOL(typenum) || PyTypeNum_ISINTEGER(typenum))
- && ((strcmp(self->name,"add") == 0)
- || (strcmp(self->name,"multiply") == 0))) {
+ && ((strcmp(ufunc->name,"add") == 0)
+ || (strcmp(ufunc->name,"multiply") == 0))) {
if (PyTypeNum_ISBOOL(typenum)) {
typenum = NPY_LONG;
}
@@ -4024,7 +4048,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
switch(operation) {
case UFUNC_REDUCE:
- ret = PyUFunc_Reduce(self, mp, out, naxes, axes,
+ ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes,
otype->type_num, skipna, keepdims);
break;
case UFUNC_ACCUMULATE:
@@ -4035,7 +4059,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
Py_DECREF(mp);
return NULL;
}
- ret = (PyArrayObject *)PyUFunc_Accumulate(self, mp, out, axes[0],
+ ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0],
otype->type_num, skipna);
break;
case UFUNC_REDUCEAT:
@@ -4046,7 +4070,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args,
Py_DECREF(mp);
return NULL;
}
- ret = (PyArrayObject *)PyUFunc_Reduceat(self, mp, indices, out,
+ ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc, mp, indices, out,
axes[0], otype->type_num, skipna);
Py_DECREF(indices);
break;
@@ -4218,7 +4242,7 @@ _find_array_wrap(PyObject *args, PyObject *kwds,
static PyObject *
-ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
int i;
PyTupleObject *ret;
@@ -4232,19 +4256,19 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
* Initialize all array objects to NULL to make cleanup easier
* if something goes wrong.
*/
- for(i = 0; i < self->nargs; i++) {
+ for(i = 0; i < ufunc->nargs; i++) {
mps[i] = NULL;
}
- errval = PyUFunc_GenericFunction(self, args, kwds, mps);
+ errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps);
if (errval < 0) {
- for (i = 0; i < self->nargs; i++) {
+ for (i = 0; i < ufunc->nargs; i++) {
PyArray_XDECREF_ERR(mps[i]);
}
if (errval == -1) {
return NULL;
}
- else if (self->nin == 2 && self->nout == 1) {
+ else if (ufunc->nin == 2 && ufunc->nout == 1) {
/* To allow the other argument to be given a chance */
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
@@ -4257,7 +4281,7 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
}
/* Free the input references */
- for (i = 0; i < self->nin; i++) {
+ for (i = 0; i < ufunc->nin; i++) {
Py_XDECREF(mps[i]);
}
@@ -4278,11 +4302,11 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
* None --- array-object passed in don't call PyArray_Return
* method --- the __array_wrap__ method to call.
*/
- _find_array_wrap(args, kwds, wraparr, self->nin, self->nout);
+ _find_array_wrap(args, kwds, wraparr, ufunc->nin, ufunc->nout);
/* wrap outputs */
- for (i = 0; i < self->nout; i++) {
- int j = self->nin+i;
+ for (i = 0; i < ufunc->nout; i++) {
+ int j = ufunc->nin+i;
PyObject *wrap = wraparr[i];
if (wrap != NULL) {
@@ -4291,7 +4315,7 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
retobj[i] = (PyObject *)mps[j];
continue;
}
- res = PyObject_CallFunction(wrap, "O(OOi)", mps[j], self, args, i);
+ res = PyObject_CallFunction(wrap, "O(OOi)", mps[j], ufunc, args, i);
if (res == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
PyErr_Clear();
res = PyObject_CallFunctionObjArgs(wrap, mps[j], NULL);
@@ -4316,19 +4340,19 @@ ufunc_generic_call(PyUFuncObject *self, PyObject *args, PyObject *kwds)
}
- if (self->nout == 1) {
+ if (ufunc->nout == 1) {
return retobj[0];
}
else {
- ret = (PyTupleObject *)PyTuple_New(self->nout);
- for (i = 0; i < self->nout; i++) {
+ ret = (PyTupleObject *)PyTuple_New(ufunc->nout);
+ for (i = 0; i < ufunc->nout; i++) {
PyTuple_SET_ITEM(ret, i, retobj[i]);
}
return (PyObject *)ret;
}
fail:
- for (i = self->nin; i < self->nargs; i++) {
+ for (i = ufunc->nin; i < ufunc->nargs; i++) {
Py_XDECREF(mps[i]);
}
return NULL;
@@ -4484,59 +4508,61 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
char *name, char *doc,
int check_return, const char *signature)
{
- PyUFuncObject *self;
+ PyUFuncObject *ufunc;
- self = PyArray_malloc(sizeof(PyUFuncObject));
- if (self == NULL) {
+ ufunc = PyArray_malloc(sizeof(PyUFuncObject));
+ if (ufunc == NULL) {
return NULL;
}
- PyObject_Init((PyObject *)self, &PyUFunc_Type);
+ PyObject_Init((PyObject *)ufunc, &PyUFunc_Type);
- self->nin = nin;
- self->nout = nout;
- self->nargs = nin+nout;
- self->identity = identity;
+ ufunc->nin = nin;
+ ufunc->nout = nout;
+ ufunc->nargs = nin+nout;
+ ufunc->identity = identity;
- self->functions = func;
- self->data = data;
- self->types = types;
- self->ntypes = ntypes;
- self->check_return = check_return;
- self->ptr = NULL;
- self->obj = NULL;
- self->userloops=NULL;
+ ufunc->functions = func;
+ ufunc->data = data;
+ ufunc->types = types;
+ ufunc->ntypes = ntypes;
+ ufunc->check_return = check_return;
+ ufunc->ptr = NULL;
+ ufunc->obj = NULL;
+ ufunc->userloops=NULL;
- self->type_resolution_function = &PyUFunc_DefaultTypeResolution;
- self->type_resolution_masked_function =
- &PyUFunc_DefaultTypeResolutionMasked;
+ /* Type resolution and inner loop selection functions */
+ ufunc->type_resolution_function = &PyUFunc_DefaultTypeResolution;
+ ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector;
+ ufunc->inner_loop_selector = NULL;
+ ufunc->masked_inner_loop_selector = &PyUFunc_DefaultMaskedInnerLoopSelector;
if (name == NULL) {
- self->name = "?";
+ ufunc->name = "?";
}
else {
- self->name = name;
+ ufunc->name = name;
}
if (doc == NULL) {
- self->doc = "NULL";
+ ufunc->doc = "NULL";
}
else {
- self->doc = doc;
+ ufunc->doc = doc;
}
/* generalized ufunc */
- self->core_enabled = 0;
- self->core_num_dim_ix = 0;
- self->core_num_dims = NULL;
- self->core_dim_ixs = NULL;
- self->core_offsets = NULL;
- self->core_signature = NULL;
+ ufunc->core_enabled = 0;
+ ufunc->core_num_dim_ix = 0;
+ ufunc->core_num_dims = NULL;
+ ufunc->core_dim_ixs = NULL;
+ ufunc->core_offsets = NULL;
+ ufunc->core_signature = NULL;
if (signature != NULL) {
- if (_parse_signature(self, signature) != 0) {
- Py_DECREF(self);
+ if (_parse_signature(ufunc, signature) != 0) {
+ Py_DECREF(ufunc);
return NULL;
}
}
- return (PyObject *)self;
+ return (PyObject *)ufunc;
}
/* Specify that the loop specified by the given index should use the array of
@@ -4556,6 +4582,12 @@ PyUFunc_SetUsesArraysAsData(void **data, size_t i)
*
* NOTE: This is easier to specify with the type_resolution_function
* in the ufunc object.
+ *
+ * TODO: Remove this, since this is already basically broken
+ * with the addition of the masked inner loops and
+ * not worth fixing since the new loop selection functions
+ * have access to the full dtypes and can dynamically allocate
+ * arbitrary auxiliary data.
*/
static int
_does_loop_use_arrays(void *data)
@@ -4749,34 +4781,34 @@ PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc,
static void
-ufunc_dealloc(PyUFuncObject *self)
+ufunc_dealloc(PyUFuncObject *ufunc)
{
- if (self->core_num_dims) {
- PyArray_free(self->core_num_dims);
+ if (ufunc->core_num_dims) {
+ PyArray_free(ufunc->core_num_dims);
}
- if (self->core_dim_ixs) {
- PyArray_free(self->core_dim_ixs);
+ if (ufunc->core_dim_ixs) {
+ PyArray_free(ufunc->core_dim_ixs);
}
- if (self->core_offsets) {
- PyArray_free(self->core_offsets);
+ if (ufunc->core_offsets) {
+ PyArray_free(ufunc->core_offsets);
}
- if (self->core_signature) {
- PyArray_free(self->core_signature);
+ if (ufunc->core_signature) {
+ PyArray_free(ufunc->core_signature);
}
- if (self->ptr) {
- PyArray_free(self->ptr);
+ if (ufunc->ptr) {
+ PyArray_free(ufunc->ptr);
}
- Py_XDECREF(self->userloops);
- Py_XDECREF(self->obj);
- PyArray_free(self);
+ Py_XDECREF(ufunc->userloops);
+ Py_XDECREF(ufunc->obj);
+ PyArray_free(ufunc);
}
static PyObject *
-ufunc_repr(PyUFuncObject *self)
+ufunc_repr(PyUFuncObject *ufunc)
{
char buf[100];
- sprintf(buf, "<ufunc '%.50s'>", self->name);
+ sprintf(buf, "<ufunc '%.50s'>", ufunc->name);
return PyUString_FromString(buf);
}
@@ -4793,7 +4825,7 @@ ufunc_repr(PyUFuncObject *self)
* The result has dimensions a.ndim + b.ndim
*/
static PyObject *
-ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_outer(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
int i;
PyObject *ret;
@@ -4801,14 +4833,14 @@ ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
PyObject *new_args, *tmp;
PyObject *shape1, *shape2, *newshape;
- if (self->core_enabled) {
+ if (ufunc->core_enabled) {
PyErr_Format(PyExc_TypeError,
"method outer is not allowed in ufunc with non-trivial"\
" signature");
return NULL;
}
- if(self->nin != 2) {
+ if(ufunc->nin != 2) {
PyErr_SetString(PyExc_ValueError,
"outer product only supported "\
"for binary functions");
@@ -4871,7 +4903,7 @@ ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
Py_DECREF(ap1);
Py_DECREF(ap2);
Py_DECREF(ap_new);
- ret = ufunc_generic_call(self, new_args, kwds);
+ ret = ufunc_generic_call(ufunc, new_args, kwds);
Py_DECREF(new_args);
return ret;
@@ -4884,21 +4916,21 @@ ufunc_outer(PyUFuncObject *self, PyObject *args, PyObject *kwds)
static PyObject *
-ufunc_reduce(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_reduce(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
- return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCE);
+ return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_REDUCE);
}
static PyObject *
-ufunc_accumulate(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_accumulate(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
- return PyUFunc_GenericReduction(self, args, kwds, UFUNC_ACCUMULATE);
+ return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_ACCUMULATE);
}
static PyObject *
-ufunc_reduceat(PyUFuncObject *self, PyObject *args, PyObject *kwds)
+ufunc_reduceat(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
{
- return PyUFunc_GenericReduction(self, args, kwds, UFUNC_REDUCEAT);
+ return PyUFunc_GenericReduction(ufunc, args, kwds, UFUNC_REDUCEAT);
}
@@ -4959,7 +4991,7 @@ _typecharfromnum(int num) {
}
static PyObject *
-ufunc_get_doc(PyUFuncObject *self)
+ufunc_get_doc(PyUFuncObject *ufunc)
{
/*
* Put docstring first or FindMethod finds it... could so some
@@ -4968,20 +5000,20 @@ ufunc_get_doc(PyUFuncObject *self)
* construct name(x1, x2, ...,[ out1, out2, ...]) __doc__
*/
PyObject *outargs, *inargs, *doc;
- outargs = _makeargs(self->nout, "out", 1);
- inargs = _makeargs(self->nin, "x", 0);
+ outargs = _makeargs(ufunc->nout, "out", 1);
+ inargs = _makeargs(ufunc->nin, "x", 0);
if (outargs == NULL) {
doc = PyUString_FromFormat("%s(%s)\n\n%s",
- self->name,
+ ufunc->name,
PyString_AS_STRING(inargs),
- self->doc);
+ ufunc->doc);
}
else {
doc = PyUString_FromFormat("%s(%s[, %s])\n\n%s",
- self->name,
+ ufunc->name,
PyString_AS_STRING(inargs),
PyString_AS_STRING(outargs),
- self->doc);
+ ufunc->doc);
Py_DECREF(outargs);
}
Py_DECREF(inargs);
@@ -4989,38 +5021,38 @@ ufunc_get_doc(PyUFuncObject *self)
}
static PyObject *
-ufunc_get_nin(PyUFuncObject *self)
+ufunc_get_nin(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->nin);
+ return PyInt_FromLong(ufunc->nin);
}
static PyObject *
-ufunc_get_nout(PyUFuncObject *self)
+ufunc_get_nout(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->nout);
+ return PyInt_FromLong(ufunc->nout);
}
static PyObject *
-ufunc_get_nargs(PyUFuncObject *self)
+ufunc_get_nargs(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->nargs);
+ return PyInt_FromLong(ufunc->nargs);
}
static PyObject *
-ufunc_get_ntypes(PyUFuncObject *self)
+ufunc_get_ntypes(PyUFuncObject *ufunc)
{
- return PyInt_FromLong(self->ntypes);
+ return PyInt_FromLong(ufunc->ntypes);
}
static PyObject *
-ufunc_get_types(PyUFuncObject *self)
+ufunc_get_types(PyUFuncObject *ufunc)
{
/* return a list with types grouped input->output */
PyObject *list;
PyObject *str;
- int k, j, n, nt = self->ntypes;
- int ni = self->nin;
- int no = self->nout;
+ int k, j, n, nt = ufunc->ntypes;
+ int ni = ufunc->nin;
+ int no = ufunc->nout;
char *t;
list = PyList_New(nt);
if (list == NULL) {
@@ -5030,13 +5062,13 @@ ufunc_get_types(PyUFuncObject *self)
n = 0;
for (k = 0; k < nt; k++) {
for (j = 0; j<ni; j++) {
- t[j] = _typecharfromnum(self->types[n]);
+ t[j] = _typecharfromnum(ufunc->types[n]);
n++;
}
t[ni] = '-';
t[ni+1] = '>';
for (j = 0; j < no; j++) {
- t[ni + 2 + j] = _typecharfromnum(self->types[n]);
+ t[ni + 2 + j] = _typecharfromnum(ufunc->types[n]);
n++;
}
str = PyUString_FromStringAndSize(t, no + ni + 2);
@@ -5047,15 +5079,15 @@ ufunc_get_types(PyUFuncObject *self)
}
static PyObject *
-ufunc_get_name(PyUFuncObject *self)
+ufunc_get_name(PyUFuncObject *ufunc)
{
- return PyUString_FromString(self->name);
+ return PyUString_FromString(ufunc->name);
}
static PyObject *
-ufunc_get_identity(PyUFuncObject *self)
+ufunc_get_identity(PyUFuncObject *ufunc)
{
- switch(self->identity) {
+ switch(ufunc->identity) {
case PyUFunc_One:
return PyInt_FromLong(1);
case PyUFunc_Zero:
@@ -5065,12 +5097,12 @@ ufunc_get_identity(PyUFuncObject *self)
}
static PyObject *
-ufunc_get_signature(PyUFuncObject *self)
+ufunc_get_signature(PyUFuncObject *ufunc)
{
- if (!self->core_enabled) {
+ if (!ufunc->core_enabled) {
Py_RETURN_NONE;
}
- return PyUString_FromString(self->core_signature);
+ return PyUString_FromString(ufunc->core_signature);
}
#undef _typecharfromnum
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 4bdc9cbb9..8a2041a24 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -122,8 +122,7 @@ ensure_dtype_nbo(PyArray_Descr *type)
/*UFUNC_API
*
* This function applies the default type resolution rules
- * for the provided ufunc, filling out_dtypes, out_innerloop,
- * and out_innerloopdata.
+ * for the provided ufunc.
*
* Returns 0 on success, -1 on error.
*/
@@ -132,9 +131,7 @@ PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int i, nop = ufunc->nin + ufunc->nout;
int retval = 0, any_object = 0;
@@ -151,20 +148,20 @@ PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
/*
* Decide the casting rules for inputs and outputs. We want
* NPY_SAFE_CASTING or stricter, so that the loop selection code
- * doesn't choose an integer loop for float inputs, for example.
+ * doesn't choose an integer loop for float inputs, or a float32
+ * loop for float64 inputs.
*/
input_casting = (casting > NPY_SAFE_CASTING) ? NPY_SAFE_CASTING : casting;
if (type_tup == NULL) {
/* Find the best ufunc inner loop, and fill in the dtypes */
- retval = find_best_ufunc_inner_loop(ufunc, operands,
+ retval = linear_search_type_resolution(ufunc, operands,
input_casting, casting, any_object,
- out_dtypes, out_innerloop, out_innerloopdata);
+ out_dtypes);
} else {
/* Find the specified ufunc inner loop, and fill in the dtypes */
- retval = find_specified_ufunc_inner_loop(ufunc, type_tup,
- operands, casting, any_object, out_dtypes,
- out_innerloop, out_innerloopdata);
+ retval = type_tuple_type_resolution(ufunc, type_tup,
+ operands, casting, any_object, out_dtypes);
}
return retval;
@@ -187,11 +184,9 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
- int i, type_num, type_num1, type_num2;
+ int i, type_num1, type_num2;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -213,7 +208,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
if (type_num1 >= NPY_NTYPES || type_num2 >= NPY_NTYPES ||
type_num1 == NPY_OBJECT || type_num2 == NPY_OBJECT) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_tup == NULL) {
@@ -232,8 +227,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
*/
if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
return PyUFunc_DefaultTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
if (!PyArray_DescrCheck(PyTuple_GET_ITEM(type_tup, 0))) {
@@ -270,31 +264,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- type_num = out_dtypes[0]->type_num;
-
- /* If we have a built-in type, search in the functions list */
- if (type_num < NPY_NTYPES) {
- char *types = ufunc->types;
- int n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "ufunc '%s' not supported for the input types",
- ufunc_name);
- return -1;
- }
- else {
- PyErr_SetString(PyExc_RuntimeError,
- "user type shouldn't have resulted from type promotion");
- return -1;
- }
+ return 0;
}
/*
@@ -313,11 +283,9 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
- int i, type_num, type_num1;
+ int i, type_num1;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -337,7 +305,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
type_num1 = PyArray_DESCR(operands[0])->type_num;
if (type_num1 >= NPY_NTYPES || type_num1 == NPY_OBJECT) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_tup == NULL) {
@@ -356,8 +324,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
*/
if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
return PyUFunc_DefaultTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
if (!PyArray_DescrCheck(PyTuple_GET_ITEM(type_tup, 0))) {
@@ -384,31 +351,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- type_num = out_dtypes[0]->type_num;
-
- /* If we have a built-in type, search in the functions list */
- if (type_num < NPY_NTYPES) {
- char *types = ufunc->types;
- int n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[2*i] == type_num) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "ufunc '%s' not supported for the input types",
- ufunc_name);
- return -1;
- }
- else {
- PyErr_SetString(PyExc_RuntimeError,
- "user type shouldn't have resulted from type promotion");
- return -1;
- }
+ return 0;
}
/*
@@ -421,14 +364,11 @@ PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING NPY_UNUSED(casting),
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
return PyUFunc_SimpleUnaryOperationTypeResolution(ufunc,
NPY_UNSAFE_CASTING,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
@@ -449,11 +389,9 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
- int i, type_num, type_num1, type_num2;
+ int i, type_num1, type_num2;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -475,7 +413,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
if (type_num1 >= NPY_NTYPES || type_num2 >= NPY_NTYPES ||
type_num1 == NPY_OBJECT || type_num2 == NPY_OBJECT) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_tup == NULL) {
@@ -496,8 +434,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
*/
if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
return PyUFunc_DefaultTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
if (!PyArray_DescrCheck(PyTuple_GET_ITEM(type_tup, 0))) {
@@ -526,31 +463,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- type_num = out_dtypes[0]->type_num;
-
- /* If we have a built-in type, search in the functions list */
- if (type_num < NPY_NTYPES) {
- char *types = ufunc->types;
- int n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "ufunc '%s' not supported for the input types",
- ufunc_name);
- return -1;
- }
- else {
- PyErr_SetString(PyExc_RuntimeError,
- "user type shouldn't have resulted from type promotion");
- return -1;
- }
+ return 0;
}
/*
@@ -565,19 +478,16 @@ PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
/* Use the default for complex types, to find the loop producing float */
if (PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[0])->type_num)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
else {
return PyUFunc_SimpleUnaryOperationTypeResolution(ufunc, casting,
- operands, type_tup, out_dtypes,
- out_innerloop, out_innerloopdata);
+ operands, type_tup, out_dtypes);
}
}
@@ -672,13 +582,10 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -689,7 +596,7 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -831,22 +738,7 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -879,13 +771,10 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -896,7 +785,7 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -1019,22 +908,7 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -1064,13 +938,10 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -1081,7 +952,7 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -1180,22 +1051,7 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -1224,13 +1080,10 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int type_num1, type_num2;
- char *types;
- int i, n;
+ int i;
char *ufunc_name;
ufunc_name = ufunc->name ? ufunc->name : "<unnamed ufunc>";
@@ -1241,7 +1094,7 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
/* Use the default when datetime and timedelta are not involved */
if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
- type_tup, out_dtypes, out_innerloop, out_innerloopdata);
+ type_tup, out_dtypes);
}
if (type_num1 == NPY_TIMEDELTA) {
@@ -1317,22 +1170,7 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
return -1;
}
- /* Search in the functions list */
- types = ufunc->types;
- n = ufunc->ntypes;
-
- for (i = 0; i < n; ++i) {
- if (types[3*i] == type_num1 && types[3*i+1] == type_num2) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- return 0;
- }
- }
-
- PyErr_Format(PyExc_TypeError,
- "internal error: could not find appropriate datetime "
- "inner loop in %s ufunc", ufunc_name);
- return -1;
+ return 0;
type_reso_error: {
PyObject *errmsg;
@@ -1349,6 +1187,121 @@ type_reso_error: {
}
}
+static int
+find_userloop(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata)
+{
+ npy_intp i, nin = ufunc->nin, j, nargs = nin + ufunc->nout;
+ PyUFunc_Loop1d *funcdata;
+
+ /* Use this to try to avoid repeating the same userdef loop search */
+ int last_userdef = -1;
+
+ for (i = 0; i < nin; ++i) {
+ int type_num = dtypes[i]->type_num;
+ if (type_num != last_userdef && PyTypeNum_ISUSERDEF(type_num)) {
+ PyObject *key, *obj;
+
+ last_userdef = type_num;
+
+ key = PyInt_FromLong(type_num);
+ if (key == NULL) {
+ return -1;
+ }
+ obj = PyDict_GetItem(ufunc->userloops, key);
+ Py_DECREF(key);
+ if (obj == NULL) {
+ continue;
+ }
+ funcdata = (PyUFunc_Loop1d *)NpyCapsule_AsVoidPtr(obj);
+ while (funcdata != NULL) {
+ int *types = funcdata->arg_types;
+
+ for (j = 0; j < nargs; ++j) {
+ if (types[j] != dtypes[j]->type_num) {
+ break;
+ }
+ }
+ /* It matched */
+ if (j == nargs) {
+ *out_innerloop = funcdata->func;
+ *out_innerloopdata = funcdata->data;
+ return 1;
+ }
+
+ funcdata = funcdata->next;
+ }
+ }
+ }
+
+ /* Didn't find a match */
+ return 0;
+}
+
+NPY_NO_EXPORT int
+PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata)
+{
+ int nargs = ufunc->nargs;
+ char *types;
+ const char *ufunc_name;
+ PyObject *errmsg;
+ int i, j;
+
+ ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
+
+ /*
+ * If there are user-loops search them first.
+ * TODO: There needs to be a loop selection acceleration structure,
+ * like a hash table.
+ */
+ if (ufunc->userloops) {
+ switch (find_userloop(ufunc, dtypes,
+ out_innerloop, out_innerloopdata)) {
+ /* Error */
+ case -1:
+ return -1;
+ /* Found a loop */
+ case 1:
+ return 0;
+ }
+ }
+
+ types = ufunc->types;
+ for (i = 0; i < ufunc->ntypes; ++i) {
+ /* Copy the types into an int array for matching */
+ for (j = 0; j < nargs; ++j) {
+ if (types[j] != dtypes[j]->type_num) {
+ break;
+ }
+ }
+ if (j == nargs) {
+ *out_innerloop = ufunc->functions[i];
+ *out_innerloopdata = ufunc->data[i];
+ return 0;
+ }
+
+ types += nargs;
+ }
+
+ errmsg = PyUString_FromFormat("ufunc '%s' did not contain a loop "
+ "with signature matching types ", ufunc_name);
+ for (i = 0; i < nargs; ++i) {
+ PyUString_ConcatAndDel(&errmsg,
+ PyObject_Repr((PyObject *)dtypes[i]));
+ if (i < nargs - 1) {
+ PyUString_ConcatAndDel(&errmsg, PyUString_FromString(" "));
+ }
+ }
+ PyErr_SetObject(PyExc_TypeError, errmsg);
+
+ return -1;
+}
+
typedef struct {
NpyAuxData base;
PyUFuncGenericFunction unmasked_innerloop;
@@ -1380,25 +1333,22 @@ ufunc_masker_data_clone(NpyAuxData *data)
*/
static void
unmasked_ufunc_loop_as_masked(
- char **args,
- char *mask,
- npy_intp *dimensions,
- npy_intp *steps,
- npy_intp mask_stride,
+ char **dataptrs, npy_intp *strides,
+ char *mask, npy_intp mask_stride,
+ npy_intp loopsize,
NpyAuxData *innerloopdata)
{
_ufunc_masker_data *data;
int iargs, nargs;
PyUFuncGenericFunction unmasked_innerloop;
void *unmasked_innerloopdata;
- npy_intp loopsize, subloopsize;
+ npy_intp subloopsize;
/* Put the aux data into local variables */
data = (_ufunc_masker_data *)innerloopdata;
unmasked_innerloop = data->unmasked_innerloop;
unmasked_innerloopdata = data->unmasked_innerloopdata;
nargs = data->nargs;
- loopsize = *dimensions;
/* Process the data as runs of unmasked values */
do {
@@ -1410,7 +1360,7 @@ unmasked_ufunc_loop_as_masked(
mask += mask_stride;
}
for (iargs = 0; iargs < nargs; ++iargs) {
- args[iargs] += subloopsize * steps[iargs];
+ dataptrs[iargs] += subloopsize * strides[iargs];
}
loopsize -= subloopsize;
/*
@@ -1423,35 +1373,40 @@ unmasked_ufunc_loop_as_masked(
++subloopsize;
mask += mask_stride;
}
- unmasked_innerloop(args, &subloopsize, steps, unmasked_innerloopdata);
+ unmasked_innerloop(dataptrs, &subloopsize, strides,
+ unmasked_innerloopdata);
for (iargs = 0; iargs < nargs; ++iargs) {
- args[iargs] += subloopsize * steps[iargs];
+ dataptrs[iargs] += subloopsize * strides[iargs];
}
loopsize -= subloopsize;
} while (loopsize > 0);
}
-/*UFUNC_API
- *
- * This function calls the unmasked type resolution function of the
- * ufunc, then wraps it with a function which only calls the inner
- * loop where the mask is True.
+/*
+ * This function wraps a legacy inner loop so it becomes masked.
*
* Returns 0 on success, -1 on error.
*/
NPY_NO_EXPORT int
-PyUFunc_DefaultTypeResolutionMasked(PyUFuncObject *ufunc,
- NPY_CASTING casting,
- PyArrayObject **operands,
- PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericMaskedFunction *out_innerloop,
- NpyAuxData **out_innerloopdata)
+PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *NPY_UNUSED(fixed_strides),
+ npy_intp NPY_UNUSED(fixed_mask_stride),
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata)
{
int retcode;
_ufunc_masker_data *data;
+ if (ufunc->legacy_inner_loop_selector == NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "the ufunc default masked inner loop selector doesn't "
+ "yet support wrapping the new inner loop selector, it "
+ "still only wraps the legacy inner loop selector");
+ return -1;
+ }
+
/* Create a new NpyAuxData object for the masker data */
data = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data));
if (data == NULL) {
@@ -1464,8 +1419,7 @@ PyUFunc_DefaultTypeResolutionMasked(PyUFuncObject *ufunc,
data->nargs = ufunc->nin + ufunc->nout;
/* Get the unmasked ufunc inner loop */
- retcode = ufunc->type_resolution_function(ufunc, casting,
- operands, type_tup, out_dtypes,
+ retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
&data->unmasked_innerloop, &data->unmasked_innerloopdata);
if (retcode < 0) {
PyArray_free(data);
@@ -1597,15 +1551,13 @@ set_ufunc_loop_data_types(PyUFuncObject *self, PyArrayObject **op,
* Does a search through the arguments and the loops
*/
static int
-find_ufunc_matching_userloop(PyUFuncObject *self,
+linear_search_userloop_type_resolution(PyUFuncObject *self,
PyArrayObject **op,
NPY_CASTING input_casting,
NPY_CASTING output_casting,
int any_object,
int use_min_scalar,
PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata,
int *out_no_castable_output,
char *out_err_src_typecode,
char *out_err_dst_typecode)
@@ -1647,11 +1599,6 @@ find_ufunc_matching_userloop(PyUFuncObject *self,
/* Found a match */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = funcdata->func;
- *out_innerloopdata = funcdata->data;
-
return 0;
}
@@ -1668,16 +1615,14 @@ find_ufunc_matching_userloop(PyUFuncObject *self,
* Does a search through the arguments and the loops
*/
static int
-find_ufunc_specified_userloop(PyUFuncObject *self,
+type_tuple_userloop_type_resolution(PyUFuncObject *self,
int n_specified,
int *specified_types,
PyArrayObject **op,
NPY_CASTING casting,
int any_object,
int use_min_scalar,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtype)
{
int i, j, nin = self->nin, nop = nin + self->nout;
PyUFunc_Loop1d *funcdata;
@@ -1734,11 +1679,6 @@ find_ufunc_specified_userloop(PyUFuncObject *self,
/* It works */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = funcdata->func;
- *out_innerloopdata = funcdata->data;
-
return 0;
/* Didn't match */
case 0:
@@ -1838,14 +1778,12 @@ should_use_min_scalar(PyArrayObject **op, int nop)
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_best_ufunc_inner_loop(PyUFuncObject *self,
+linear_search_type_resolution(PyUFuncObject *self,
PyArrayObject **op,
NPY_CASTING input_casting,
NPY_CASTING output_casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtype)
{
npy_intp i, j, nin = self->nin, nop = nin + self->nout;
int types[NPY_MAXARGS];
@@ -1861,10 +1799,9 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
/* If the ufunc has userloops, search for them. */
if (self->userloops) {
- switch (find_ufunc_matching_userloop(self, op,
+ switch (linear_search_userloop_type_resolution(self, op,
input_casting, output_casting,
- any_object, use_min_scalar,
- out_dtype, out_innerloop, out_innerloopdata,
+ any_object, use_min_scalar, out_dtype,
&no_castable_output, &err_src_typecode,
&err_dst_typecode)) {
/* Error */
@@ -1913,14 +1850,8 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
/* Found a match */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
-
return 0;
}
-
}
/* If no function was found, throw an error */
@@ -1955,14 +1886,12 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_specified_ufunc_inner_loop(PyUFuncObject *self,
+type_tuple_type_resolution(PyUFuncObject *self,
PyObject *type_tup,
PyArrayObject **op,
NPY_CASTING casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtype)
{
npy_intp i, j, n, nin = self->nin, nop = nin + self->nout;
int n_specified = 0;
@@ -2061,11 +1990,11 @@ find_specified_ufunc_inner_loop(PyUFuncObject *self,
/* If the ufunc has userloops, search for them. */
if (self->userloops) {
- switch (find_ufunc_specified_userloop(self,
+ switch (type_tuple_userloop_type_resolution(self,
n_specified, specified_types,
op, casting,
any_object, use_min_scalar,
- out_dtype, out_innerloop, out_innerloopdata)) {
+ out_dtype)) {
/* Error */
case -1:
return -1;
@@ -2112,11 +2041,6 @@ find_specified_ufunc_inner_loop(PyUFuncObject *self,
/* It worked */
case 1:
set_ufunc_loop_data_types(self, op, out_dtype, types);
-
- /* Save the inner loop and its data */
- *out_innerloop = self->functions[i];
- *out_innerloopdata = self->data[i];
-
return 0;
/* Didn't work */
case 0:
@@ -2128,7 +2052,6 @@ find_specified_ufunc_inner_loop(PyUFuncObject *self,
ufunc_name);
return -1;
}
-
}
/* If no function was found, throw an error */
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index f1ded2e9b..dad2b6c6c 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -6,80 +6,63 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
NPY_NO_EXPORT int
PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
+
NPY_NO_EXPORT int
PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtypes);
/*
* Does a linear search for the best inner loop of the ufunc.
@@ -88,14 +71,12 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_best_ufunc_inner_loop(PyUFuncObject *self,
+linear_search_type_resolution(PyUFuncObject *self,
PyArrayObject **op,
NPY_CASTING input_casting,
NPY_CASTING output_casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtype);
/*
* Does a linear search for the inner loop of the ufunc specified by type_tup.
@@ -104,13 +85,26 @@ find_best_ufunc_inner_loop(PyUFuncObject *self,
* references in out_dtype. This function does not do its own clean-up.
*/
NPY_NO_EXPORT int
-find_specified_ufunc_inner_loop(PyUFuncObject *self,
+type_tuple_type_resolution(PyUFuncObject *self,
PyObject *type_tup,
PyArrayObject **op,
NPY_CASTING casting,
int any_object,
- PyArray_Descr **out_dtype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata);
+ PyArray_Descr **out_dtype);
+
+NPY_NO_EXPORT int
+PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata);
+
+NPY_NO_EXPORT int
+PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
+ PyArray_Descr **dtypes,
+ npy_intp *NPY_UNUSED(fixed_strides),
+ npy_intp NPY_UNUSED(fixed_mask_stride),
+ PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ NpyAuxData **out_innerloopdata);
+
#endif
diff --git a/numpy/core/src/umath/umathmodule.c.src b/numpy/core/src/umath/umathmodule.c.src
index 52dcd4c1b..02098f458 100644
--- a/numpy/core/src/umath/umathmodule.c.src
+++ b/numpy/core/src/umath/umathmodule.c.src
@@ -50,22 +50,30 @@ object_ufunc_type_resolution(PyUFuncObject *ufunc,
NPY_CASTING casting,
PyArrayObject **operands,
PyObject *type_tup,
- PyArray_Descr **out_dtypes,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+ PyArray_Descr **out_dtypes)
{
int i, nop = ufunc->nin + ufunc->nout;
+ PyArray_Descr *obj_dtype;
- out_dtypes[0] = PyArray_DescrFromType(NPY_OBJECT);
- if (out_dtypes[0] == NULL) {
+ obj_dtype = PyArray_DescrFromType(NPY_OBJECT);
+ if (obj_dtype == NULL) {
return -1;
}
- for (i = 1; i < nop; ++i) {
- out_dtypes[i] = out_dtypes[0];
- Py_INCREF(out_dtypes[0]);
+ for (i = 0; i < nop; ++i) {
+ Py_INCREF(obj_dtype);
+ out_dtypes[i] = obj_dtype;
}
+ return 0;
+}
+
+static int
+object_ufunc_loop_selector(PyUFuncObject *ufunc,
+ PyArray_Descr **NPY_UNUSED(dtypes),
+ PyUFuncGenericFunction *out_innerloop,
+ void **out_innerloopdata)
+{
*out_innerloop = ufunc->functions[0];
*out_innerloopdata = ufunc->data[0];
@@ -115,6 +123,7 @@ ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUS
self->core_signature = NULL;
self->type_resolution_function = &object_ufunc_type_resolution;
+ self->legacy_inner_loop_selector = &object_ufunc_loop_selector;
pyname = PyObject_GetAttrString(function, "__name__");
if (pyname) {