summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2021-06-27 19:39:30 +0300
committerGitHub <noreply@github.com>2021-06-27 19:39:30 +0300
commitf8a30474faaa135ea1b23ad0184f74a19d8b480e (patch)
treee766b34d8844e5c20a2fac199e233f6e30d3f223
parentb970937c080854c9bf49933fbd39e0818663dd09 (diff)
parent061ac77a06edbc503bb552e4d4ca6729d524c861 (diff)
downloadnumpy-f8a30474faaa135ea1b23ad0184f74a19d8b480e.tar.gz
Merge pull request #19259 from seberg/maint-ufunc-refactor-masked-not-duplicate
MAINT: Align masked with normal ufunc loops
-rw-r--r--doc/release/upcoming_changes/19259.c_api.rst12
-rw-r--r--numpy/core/include/numpy/ufuncobject.h35
-rw-r--r--numpy/core/src/umath/ufunc_object.c276
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c122
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h6
-rw-r--r--numpy/core/tests/test_umath.py10
6 files changed, 156 insertions, 305 deletions
diff --git a/doc/release/upcoming_changes/19259.c_api.rst b/doc/release/upcoming_changes/19259.c_api.rst
new file mode 100644
index 000000000..dac9f520a
--- /dev/null
+++ b/doc/release/upcoming_changes/19259.c_api.rst
@@ -0,0 +1,12 @@
+Masked inner-loops cannot be customized anymore
+-----------------------------------------------
+The masked inner-loop selector is now never used. A warning
+will be given in the unlikely event that it was customized.
+
+We do not expect that any code uses this. If you do use it,
+you must unset the selector on newer NumPy version.
+Please also contact the NumPy developers, we do anticipate
+providing a new, more specific, mechanism.
+
+The customization was part of a never-implemented feature to allow
+for faster masked operations.
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 333a326ee..0f3b8529a 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -66,27 +66,14 @@ typedef int (PyUFunc_TypeResolutionFunc)(
PyArray_Descr **out_dtypes);
/*
- * Given an array of DTypes as returned by the PyUFunc_TypeResolutionFunc,
- * and an array of fixed strides (the array will contain NPY_MAX_INTP for
- * strides which are not necessarily fixed), returns an inner loop
- * with associated auxiliary data.
- *
- * For backwards compatibility, there is a variant of the inner loop
- * selection which returns an inner loop irrespective of the strides,
- * and with a void* static auxiliary data instead of an NpyAuxData *
- * dynamically allocatable auxiliary data.
+ * Legacy loop selector. (This should NOT normally be used and we can expect
+ * that only the `PyUFunc_DefaultLegacyInnerLoopSelector` is ever set).
+ * However, unlike the masked version, it probably still works.
*
* ufunc: The ufunc object.
* dtypes: An array which has been populated with dtypes,
* in most cases by the type resolution function
* for the same ufunc.
- * fixed_strides: For each input/output, either the stride that
- * will be used every time the function is called
- * or NPY_MAX_INTP if the stride might change or
- * is not known ahead of time. The loop selection
- * function may use this stride to pick inner loops
- * which are optimized for contiguous or 0-stride
- * cases.
* out_innerloop: Should be populated with the correct ufunc inner
* loop for the given type.
* out_innerloopdata: Should be populated with the void* data to
@@ -101,15 +88,7 @@ typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)(
PyUFuncGenericFunction *out_innerloop,
void **out_innerloopdata,
int *out_needs_api);
-typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)(
- struct _tagPyUFuncObject *ufunc,
- PyArray_Descr **dtypes,
- PyArray_Descr *mask_dtype,
- npy_intp *fixed_strides,
- npy_intp fixed_mask_stride,
- PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
- NpyAuxData **out_innerloopdata,
- int *out_needs_api);
+
typedef struct _tagPyUFuncObject {
PyObject_HEAD
@@ -199,10 +178,8 @@ typedef struct _tagPyUFuncObject {
#else
void *reserved2;
#endif
- /*
- * A function which returns a masked inner loop for the ufunc.
- */
- PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
+ /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */
+ void *_always_null_previously_masked_innerloop_selector;
/*
* List of flags for each operand when ufunc is called by nditer object.
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index f7d758088..067de6990 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1123,8 +1123,7 @@ prepare_ufunc_output(PyUFuncObject *ufunc,
static NPY_INLINE int
try_trivial_single_output_loop(PyUFuncObject *ufunc,
PyArrayObject *op[], PyArray_Descr *dtypes[],
- NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args,
- PyUFuncGenericFunction innerloop, void *innerloopdata)
+ NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args)
{
int nin = ufunc->nin;
int nop = nin + 1;
@@ -1235,6 +1234,13 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc,
int needs_api = 0;
NPY_BEGIN_THREADS_DEF;
+ PyUFuncGenericFunction innerloop;
+ void *innerloopdata = NULL;
+ if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+ &innerloop, &innerloopdata, &needs_api) < 0) {
+ return -1;
+ }
+
for (int iop = 0; iop < nop; iop++) {
data[iop] = PyArray_BYTES(op[iop]);
needs_api |= PyDataType_REFCHK(dtypes[iop]);
@@ -1252,20 +1258,49 @@ try_trivial_single_output_loop(PyUFuncObject *ufunc,
static int
-iterator_loop(PyUFuncObject *ufunc,
+execute_ufunc_loop(PyUFuncObject *ufunc,
+ int masked,
PyArrayObject **op,
- PyArray_Descr **dtype,
+ PyArray_Descr **dtypes,
NPY_ORDER order,
npy_intp buffersize,
PyObject **arr_prep,
ufunc_full_args full_args,
- PyUFuncGenericFunction innerloop,
- void *innerloopdata,
npy_uint32 *op_flags)
{
int nin = ufunc->nin, nout = ufunc->nout;
int nop = nin + nout;
+ if (masked) {
+ assert(PyArray_TYPE(op[nop]) == NPY_BOOL);
+ if (ufunc->_always_null_previously_masked_innerloop_selector != NULL) {
+ if (PyErr_WarnFormat(PyExc_UserWarning, 1,
+ "The ufunc %s has a custom masked-inner-loop-selector."
+ "NumPy assumes that this is NEVER used. If you do make "
+ "use of this please notify the NumPy developers to discuss "
+ "future solutions. (See NEP 41 and 43)\n"
+ "NumPy will continue, but ignore the custom loop selector. "
+ "This should only affect performance.",
+ ufunc_get_name_cstr(ufunc)) < 0) {
+ return -1;
+ }
+ }
+
+ /*
+ * NOTE: In the masked version, we consider the output read-write,
+ * this gives a best-effort of preserving the input, but does
+ * not always work. It could allow the operand to be copied
+ * due to copy-if-overlap, but only if it was passed in.
+ * In that case `__array_prepare__` is called before it happens.
+ */
+ for (int i = nin; i < nop; ++i) {
+ op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY);
+ }
+ op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; /* mask */
+ }
+
+ NPY_UF_DBG_PRINT("Making iterator\n");
+
npy_uint32 iter_flags = ufunc->iter_flags |
NPY_ITER_EXTERNAL_LOOP |
NPY_ITER_REFS_OK |
@@ -1295,10 +1330,10 @@ iterator_loop(PyUFuncObject *ufunc,
* were already checked, we use the casting rule 'unsafe' which
* is faster to calculate.
*/
- NpyIter *iter = NpyIter_AdvancedNew(nop, op,
+ NpyIter *iter = NpyIter_AdvancedNew(nop + masked, op,
iter_flags,
order, NPY_UNSAFE_CASTING,
- op_flags, dtype,
+ op_flags, dtypes,
-1, NULL, NULL, buffersize);
if (iter == NULL) {
return -1;
@@ -1354,21 +1389,49 @@ iterator_loop(PyUFuncObject *ufunc,
for (int i = 0; i < nin; i++) {
baseptrs[i] = PyArray_BYTES(op_it[i]);
}
+ if (masked) {
+ baseptrs[nop] = PyArray_BYTES(op_it[nop]);
+ }
if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
NpyIter_Deallocate(iter);
return -1;
}
+ /*
+ * Get the inner loop.
+ */
+ int needs_api = 0;
+ PyUFuncGenericFunction innerloop;
+ void *innerloopdata = NULL;
+ if (masked) {
+ if (PyUFunc_DefaultMaskedInnerLoopSelector(ufunc,
+ dtypes, &innerloop, (NpyAuxData **)&innerloopdata,
+ &needs_api) < 0) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+ }
+ else {
+ if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+ &innerloop, &innerloopdata, &needs_api) < 0) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+ }
+
/* Get the variables needed for the loop */
NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter);
+ if (masked) {
+ NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata);
+ }
return -1;
}
char **dataptr = NpyIter_GetDataPtrArray(iter);
npy_intp *strides = NpyIter_GetInnerStrideArray(iter);
npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter);
- int needs_api = NpyIter_IterationNeedsAPI(iter);
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
NPY_BEGIN_THREADS_DEF;
@@ -1384,6 +1447,9 @@ iterator_loop(PyUFuncObject *ufunc,
} while (!(needs_api && PyErr_Occurred()) && iternext(iter));
NPY_END_THREADS;
+ if (masked) {
+ NPY_AUXDATA_FREE((NpyAuxData *)innerloopdata);
+ }
/*
* Currently `innerloop` may leave an error set, in this case
@@ -1417,20 +1483,10 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
ufunc_full_args full_args,
npy_uint32 *op_flags)
{
- PyUFuncGenericFunction innerloop;
- void *innerloopdata;
- int needs_api = 0;
-
- if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &innerloop, &innerloopdata, &needs_api) < 0) {
- return -1;
- }
-
/* First check for the trivial cases that don't need an iterator */
if (trivial_loop_ok && ufunc->nout == 1) {
int fast_path_result = try_trivial_single_output_loop(ufunc,
- op, dtypes, order, arr_prep, full_args,
- innerloop, innerloopdata);
+ op, dtypes, order, arr_prep, full_args);
if (fast_path_result != -2) {
return fast_path_result;
}
@@ -1441,186 +1497,14 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
* resolve broadcasting, etc
*/
NPY_UF_DBG_PRINT("iterator loop\n");
- if (iterator_loop(ufunc, op, dtypes, order,
- buffersize, arr_prep, full_args,
- innerloop, innerloopdata, op_flags) < 0) {
+ if (execute_ufunc_loop(ufunc, 0, op, dtypes, order,
+ buffersize, arr_prep, full_args, op_flags) < 0) {
return -1;
}
return 0;
}
-/*
- * nin - number of inputs
- * nout - number of outputs
- * wheremask - if not NULL, the 'where=' parameter to the ufunc.
- * op - the operands (nin + nout of them)
- * order - the loop execution order/output memory order
- * buffersize - how big of a buffer to use
- * arr_prep - the __array_prepare__ functions for the outputs
- * innerloop - the inner loop function
- * innerloopdata - data to pass to the inner loop
- */
-static int
-execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
- PyArrayObject *wheremask,
- PyArrayObject **op,
- PyArray_Descr **dtypes,
- NPY_ORDER order,
- npy_intp buffersize,
- PyObject **arr_prep,
- ufunc_full_args full_args,
- npy_uint32 *op_flags)
-{
- int i, nin = ufunc->nin, nout = ufunc->nout;
- int nop = nin + nout;
- NpyIter *iter;
- int needs_api;
-
- NpyIter_IterNextFunc *iternext;
- char **dataptr;
- npy_intp *strides;
- npy_intp *countptr;
-
- PyArrayObject **op_it;
- npy_uint32 iter_flags;
-
- for (i = nin; i < nop; ++i) {
- op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY);
- }
-
- if (wheremask != NULL) {
- op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK;
- }
-
- NPY_UF_DBG_PRINT("Making iterator\n");
-
- iter_flags = ufunc->iter_flags |
- NPY_ITER_EXTERNAL_LOOP |
- NPY_ITER_REFS_OK |
- NPY_ITER_ZEROSIZE_OK |
- NPY_ITER_BUFFERED |
- NPY_ITER_GROWINNER |
- NPY_ITER_COPY_IF_OVERLAP;
-
- /*
- * Allocate the iterator. Because the types of the inputs
- * were already checked, we use the casting rule 'unsafe' which
- * is faster to calculate.
- */
- iter = NpyIter_AdvancedNew(nop + ((wheremask != NULL) ? 1 : 0), op,
- iter_flags,
- order, NPY_UNSAFE_CASTING,
- op_flags, dtypes,
- -1, NULL, NULL, buffersize);
- if (iter == NULL) {
- return -1;
- }
-
- NPY_UF_DBG_PRINT("Made iterator\n");
-
- needs_api = NpyIter_IterationNeedsAPI(iter);
-
- /* Call the __array_prepare__ functions where necessary */
- op_it = NpyIter_GetOperandArray(iter);
- for (i = 0; i < nout; ++i) {
- PyArrayObject *op_tmp;
-
- /*
- * The array can be allocated by the iterator -- it is placed in op[i]
- * and returned to the caller, and this needs an extra incref.
- */
- if (op[i+nin] == NULL) {
- op_tmp = op_it[i+nin];
- Py_INCREF(op_tmp);
- }
- else {
- op_tmp = op[i+nin];
- op[i+nin] = NULL;
- }
-
- /* prepare_ufunc_output may decref & replace the pointer */
- char *original_data = PyArray_BYTES(op_tmp);
-
- if (prepare_ufunc_output(ufunc, &op_tmp,
- arr_prep[i], full_args, i) < 0) {
- NpyIter_Deallocate(iter);
- return -1;
- }
-
- /* Validate that the prepare_ufunc_output didn't mess with pointers */
- if (PyArray_BYTES(op_tmp) != original_data) {
- PyErr_SetString(PyExc_ValueError,
- "The __array_prepare__ functions modified the data "
- "pointer addresses in an invalid fashion");
- Py_DECREF(op_tmp);
- NpyIter_Deallocate(iter);
- return -1;
- }
-
- /*
- * Put the updated operand back. If COPY_IF_OVERLAP made a temporary
- * copy, the output will be copied by WRITEBACKIFCOPY even if op[i]
- * was changed by prepare_ufunc_output.
- */
- op[i+nin] = op_tmp;
- }
-
- /* Only do the loop if the iteration size is non-zero */
- if (NpyIter_GetIterSize(iter) != 0) {
- PyUFunc_MaskedStridedInnerLoopFunc *innerloop;
- NpyAuxData *innerloopdata;
- npy_intp fixed_strides[2*NPY_MAXARGS];
- PyArray_Descr **iter_dtypes;
- NPY_BEGIN_THREADS_DEF;
-
- /*
- * Get the inner loop, with the possibility of specialization
- * based on the fixed strides.
- */
- NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
- iter_dtypes = NpyIter_GetDescrArray(iter);
- if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
- wheremask != NULL ? iter_dtypes[nop]
- : iter_dtypes[nop + nin],
- fixed_strides,
- wheremask != NULL ? fixed_strides[nop]
- : fixed_strides[nop + nin],
- &innerloop, &innerloopdata, &needs_api) < 0) {
- NpyIter_Deallocate(iter);
- return -1;
- }
-
- /* Get the variables needed for the loop */
- iternext = NpyIter_GetIterNext(iter, NULL);
- if (iternext == NULL) {
- NpyIter_Deallocate(iter);
- return -1;
- }
- dataptr = NpyIter_GetDataPtrArray(iter);
- strides = NpyIter_GetInnerStrideArray(iter);
- countptr = NpyIter_GetInnerLoopSizePtr(iter);
- needs_api = NpyIter_IterationNeedsAPI(iter);
-
- NPY_BEGIN_THREADS_NDITER(iter);
-
- NPY_UF_DBG_PRINT("Actual inner loop:\n");
- /* Execute the loop */
- do {
- NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr);
- innerloop(dataptr, strides,
- dataptr[nop], strides[nop],
- *countptr, innerloopdata);
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
-
- NPY_END_THREADS;
-
- NPY_AUXDATA_FREE(innerloopdata);
- }
-
- return NpyIter_Deallocate(iter);
-}
-
/*
* Validate that operands have enough dimensions, accounting for
@@ -2634,7 +2518,7 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
/* Set up the flags */
npy_clear_floatstatus_barrier((char*)&ufunc);
- retval = execute_fancy_ufunc_loop(ufunc, wheremask,
+ retval = execute_ufunc_loop(ufunc, 1,
op, operation_descrs, order,
buffersize, output_array_prepare,
full_args, op_flags);
@@ -5188,7 +5072,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
/* Type resolution and inner loop selection functions */
ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector;
- ufunc->masked_inner_loop_selector = &PyUFunc_DefaultMaskedInnerLoopSelector;
+ ufunc->_always_null_previously_masked_innerloop_selector = NULL;
if (name == NULL) {
ufunc->name = "?";
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index e1a4948ff..e09ce3233 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -1495,30 +1495,30 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
return raise_no_loop_found_error(ufunc, dtypes);
}
+
+/*
+ * Support for masked inner-strided loops. These are currently ONLY used
+ * for normal ufuncs, and only a generic loop getter exists.
+ * It may make sense to generalize this in the future or allow specialization.
+ * Until then, the inner-loop signature is flexible.
+ */
typedef struct {
NpyAuxData base;
- PyUFuncGenericFunction unmasked_innerloop;
- void *unmasked_innerloopdata;
+ PyUFuncGenericFunction unmasked_stridedloop;
+ void *innerloopdata;
int nargs;
-} _ufunc_masker_data;
-
-static NpyAuxData *
-ufunc_masker_data_clone(NpyAuxData *data)
-{
- _ufunc_masker_data *n;
+ char *dataptrs[];
+} _masked_stridedloop_data;
- /* Allocate a new one */
- n = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data));
- if (n == NULL) {
- return NULL;
- }
-
- /* Copy the data (unmasked data doesn't have object semantics) */
- memcpy(n, data, sizeof(_ufunc_masker_data));
- return (NpyAuxData *)n;
+static void
+_masked_stridedloop_data_free(NpyAuxData *auxdata)
+{
+ _masked_stridedloop_data *data = (_masked_stridedloop_data *)auxdata;
+ PyMem_Free(data);
}
+
/*
* This function wraps a regular unmasked ufunc inner loop as a
* masked ufunc inner loop, only calling the function for
@@ -1526,43 +1526,39 @@ ufunc_masker_data_clone(NpyAuxData *data)
*/
static void
unmasked_ufunc_loop_as_masked(
- char **dataptrs, npy_intp *strides,
- char *mask, npy_intp mask_stride,
- npy_intp loopsize,
- NpyAuxData *innerloopdata)
+ char **data, const npy_intp *dimensions,
+ const npy_intp *strides, void *_auxdata)
{
- _ufunc_masker_data *data;
- int iargs, nargs;
- PyUFuncGenericFunction unmasked_innerloop;
- void *unmasked_innerloopdata;
- npy_intp subloopsize;
-
- /* Put the aux data into local variables */
- data = (_ufunc_masker_data *)innerloopdata;
- unmasked_innerloop = data->unmasked_innerloop;
- unmasked_innerloopdata = data->unmasked_innerloopdata;
- nargs = data->nargs;
+ _masked_stridedloop_data *auxdata = (_masked_stridedloop_data *)_auxdata;
+ int nargs = auxdata->nargs;
+ PyUFuncGenericFunction strided_loop = auxdata->unmasked_stridedloop;
+ void *innerloopdata = auxdata->innerloopdata;
+
+ char **dataptrs = auxdata->dataptrs;
+ memcpy(dataptrs, data, nargs * sizeof(char *));
+ char *mask = data[nargs];
+ npy_intp mask_stride = strides[nargs];
+ npy_intp N = dimensions[0];
/* Process the data as runs of unmasked values */
do {
+ ssize_t subloopsize;
+
/* Skip masked values */
- mask = npy_memchr(mask, 0, mask_stride, loopsize, &subloopsize, 1);
- for (iargs = 0; iargs < nargs; ++iargs) {
- dataptrs[iargs] += subloopsize * strides[iargs];
+ mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 1);
+ for (int i = 0; i < nargs; i++) {
+ dataptrs[i] += subloopsize * strides[i];
}
- loopsize -= subloopsize;
- /*
- * Process unmasked values (assumes unmasked loop doesn't
- * mess with the 'args' pointer values)
- */
- mask = npy_memchr(mask, 0, mask_stride, loopsize, &subloopsize, 0);
- unmasked_innerloop(dataptrs, &subloopsize, strides,
- unmasked_innerloopdata);
- for (iargs = 0; iargs < nargs; ++iargs) {
- dataptrs[iargs] += subloopsize * strides[iargs];
- }
- loopsize -= subloopsize;
- } while (loopsize > 0);
+ N -= subloopsize;
+
+ /* Process unmasked values */
+ mask = npy_memchr(mask, 0, mask_stride, N, &subloopsize, 0);
+ strided_loop(dataptrs, &subloopsize, strides, innerloopdata);
+ for (int i = 0; i < nargs; i++) {
+ dataptrs[i] += subloopsize * strides[i];
+ }
+ N -= subloopsize;
+ } while (N > 0);
}
@@ -1574,15 +1570,13 @@ unmasked_ufunc_loop_as_masked(
NPY_NO_EXPORT int
PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
PyArray_Descr **dtypes,
- PyArray_Descr *mask_dtype,
- npy_intp *NPY_UNUSED(fixed_strides),
- npy_intp NPY_UNUSED(fixed_mask_stride),
- PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
+ PyUFuncGenericFunction *out_innerloop,
NpyAuxData **out_innerloopdata,
int *out_needs_api)
{
int retcode;
- _ufunc_masker_data *data;
+ _masked_stridedloop_data *data;
+ int nargs = ufunc->nin + ufunc->nout;
if (ufunc->legacy_inner_loop_selector == NULL) {
PyErr_SetString(PyExc_RuntimeError,
@@ -1592,27 +1586,21 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
return -1;
}
- if (mask_dtype->type_num != NPY_BOOL) {
- PyErr_SetString(PyExc_ValueError,
- "only boolean masks are supported in ufunc inner loops "
- "presently");
- return -1;
- }
-
- /* Create a new NpyAuxData object for the masker data */
- data = (_ufunc_masker_data *)PyArray_malloc(sizeof(_ufunc_masker_data));
+ /* Add working memory for the data pointers, to modify them in-place */
+ data = PyMem_Malloc(sizeof(_masked_stridedloop_data) +
+ sizeof(char *) * nargs);
if (data == NULL) {
PyErr_NoMemory();
return -1;
}
- memset(data, 0, sizeof(_ufunc_masker_data));
- data->base.free = (NpyAuxData_FreeFunc *)&PyArray_free;
- data->base.clone = &ufunc_masker_data_clone;
- data->nargs = ufunc->nin + ufunc->nout;
+ data->base.free = _masked_stridedloop_data_free;
+ data->base.clone = NULL; /* not currently used */
+ data->unmasked_stridedloop = NULL;
+ data->nargs = nargs;
/* Get the unmasked ufunc inner loop */
retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &data->unmasked_innerloop, &data->unmasked_innerloopdata,
+ &data->unmasked_stridedloop, &data->innerloopdata,
out_needs_api);
if (retcode < 0) {
PyArray_free(data);
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index b11c69852..fdad19b3d 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -138,11 +138,7 @@ PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
NPY_NO_EXPORT int
PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
PyArray_Descr **dtypes,
- PyArray_Descr *mask_dtypes,
- npy_intp *NPY_UNUSED(fixed_strides),
- npy_intp NPY_UNUSED(fixed_mask_stride),
- PyUFunc_MaskedStridedInnerLoopFunc
- **out_innerloop,
+ PyUFuncGenericFunction *out_innerloop,
NpyAuxData **out_innerloopdata,
int *out_needs_api);
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index e6fcf5bd4..6fd4b4659 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -2265,10 +2265,7 @@ class TestSpecialMethods:
a = np.array(1).view(type=with_prepare)
if use_where:
- # Currently raises, due to the array being replaced during prepare
- with pytest.raises(ValueError):
- x = np.add(a, a, where=np.array(True))
- return
+ x = np.add(a, a, where=np.array(True))
else:
x = np.add(a, a)
assert_equal(x, np.array(2))
@@ -2285,10 +2282,7 @@ class TestSpecialMethods:
a = np.array([1]).view(type=with_prepare)
if use_where:
- # Currently raises, due to the array being replaced during prepare
- with pytest.raises(ValueError):
- x = np.add(a, a, a, where=[True])
- return
+ x = np.add(a, a, a, where=[True])
else:
x = np.add(a, a, a)
# Returned array is new, because of the strange