10 files changed, 480 insertions, 621 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 536b64a9a..7159d9896 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -238,7 +238,7 @@ defdict = {
 'add' :
     Ufunc(2, 1, Zero,
           docstrings.get('numpy.core.umath.add'),
-          'PyUFunc_AdditionTypeResolution',
+          'PyUFunc_AdditionTypeResolver',
           TD(notimes_or_obj),
           [TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
            TypeDescription('m', FullTypeDescr, 'mm', 'm'),
@@ -249,7 +249,7 @@ defdict = {
 'subtract' :
     Ufunc(2, 1, None, # Zero is only a unit to the right, not the left
           docstrings.get('numpy.core.umath.subtract'),
-          'PyUFunc_SubtractionTypeResolution',
+          'PyUFunc_SubtractionTypeResolver',
           TD(notimes_or_obj),
           [TypeDescription('M', FullTypeDescr, 'Mm', 'M'),
            TypeDescription('m', FullTypeDescr, 'mm', 'm'),
@@ -260,7 +260,7 @@ defdict = {
 'multiply' :
     Ufunc(2, 1, One,
           docstrings.get('numpy.core.umath.multiply'),
-          'PyUFunc_MultiplicationTypeResolution',
+          'PyUFunc_MultiplicationTypeResolver',
           TD(notimes_or_obj),
           [TypeDescription('m', FullTypeDescr, 'mq', 'm'),
            TypeDescription('m', FullTypeDescr, 'qm', 'm'),
@@ -272,7 +272,7 @@ defdict = {
 'divide' :
     Ufunc(2, 1, None, # One is only a unit to the right, not the left
           docstrings.get('numpy.core.umath.divide'),
-          'PyUFunc_DivisionTypeResolution',
+          'PyUFunc_DivisionTypeResolver',
           TD(intfltcmplx),
           [TypeDescription('m', FullTypeDescr, 'mq', 'm'),
            TypeDescription('m', FullTypeDescr, 'md', 'm'),
@@ -283,7 +283,7 @@ defdict = {
 'floor_divide' :
     Ufunc(2, 1, None, # One is only a unit to the right, not the left
           docstrings.get('numpy.core.umath.floor_divide'),
-          'PyUFunc_DivisionTypeResolution',
+          'PyUFunc_DivisionTypeResolver',
           TD(intfltcmplx),
           [TypeDescription('m', FullTypeDescr, 'mq', 'm'),
            TypeDescription('m', FullTypeDescr, 'md', 'm'),
@@ -293,7 +293,7 @@ defdict = {
 'true_divide' :
     Ufunc(2, 1, None, # One is only a unit to the right, not the left
           docstrings.get('numpy.core.umath.true_divide'),
-          'PyUFunc_DivisionTypeResolution',
+          'PyUFunc_DivisionTypeResolver',
           TD('bBhH', out='d'),
           TD('iIlLqQ', out='d'),
           TD(flts+cmplx),
@@ -336,7 +336,7 @@ defdict = {
 '_ones_like' :
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath._ones_like'),
-          'PyUFunc_OnesLikeTypeResolution',
+          'PyUFunc_OnesLikeTypeResolver',
           TD(noobj),
           TD(O, f='Py_get_one'),
           ),
@@ -351,7 +351,7 @@ defdict = {
 'absolute' :
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.absolute'),
-          'PyUFunc_AbsoluteTypeResolution',
+          'PyUFunc_AbsoluteTypeResolver',
           TD(bints+flts+timedeltaonly),
           TD(cmplx, out=('f', 'd', 'g')),
           TD(O, f='PyNumber_Absolute'),
@@ -365,7 +365,7 @@ defdict = {
 'negative' :
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.negative'),
-          'PyUFunc_SimpleUnaryOperationTypeResolution',
+          'PyUFunc_SimpleUnaryOperationTypeResolver',
           TD(bints+flts+timedeltaonly),
           TD(cmplx, f='neg'),
           TD(O, f='PyNumber_Negative'),
@@ -373,13 +373,13 @@ defdict = {
 'sign' :
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.sign'),
-          'PyUFunc_SimpleUnaryOperationTypeResolution',
+          'PyUFunc_SimpleUnaryOperationTypeResolver',
           TD(nobool_or_datetime),
           ),
 'greater' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.greater'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?'),
           ),
 'greater_equal' :
@@ -391,31 +391,31 @@ defdict = {
 'less' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.less'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?'),
           ),
 'less_equal' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.less_equal'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?'),
           ),
 'equal' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.equal'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?'),
           ),
 'not_equal' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.not_equal'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(all, out='?'),
           ),
 'logical_and' :
     Ufunc(2, 1, One,
           docstrings.get('numpy.core.umath.logical_and'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(nodatetime_or_obj, out='?'),
           TD(O, f='npy_ObjectLogicalAnd'),
           ),
@@ -429,42 +429,42 @@ defdict = {
 'logical_or' :
     Ufunc(2, 1, Zero,
           docstrings.get('numpy.core.umath.logical_or'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(nodatetime_or_obj, out='?'),
           TD(O, f='npy_ObjectLogicalOr'),
           ),
 'logical_xor' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.logical_xor'),
-          'PyUFunc_SimpleBinaryComparisonTypeResolution',
+          'PyUFunc_SimpleBinaryComparisonTypeResolver',
           TD(nodatetime_or_obj, out='?'),
           TD(P, f='logical_xor'),
           ),
 'maximum' :
     Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.maximum'),
-          'PyUFunc_SimpleBinaryOperationTypeResolution',
+          'PyUFunc_SimpleBinaryOperationTypeResolver',
           TD(noobj),
           TD(O, f='npy_ObjectMax')
           ),
 'minimum' :
     Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.minimum'),
-          'PyUFunc_SimpleBinaryOperationTypeResolution',
+          'PyUFunc_SimpleBinaryOperationTypeResolver',
           TD(noobj),
           TD(O, f='npy_ObjectMin')
           ),
 'fmax' :
     Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.fmax'),
-          'PyUFunc_SimpleBinaryOperationTypeResolution',
+          'PyUFunc_SimpleBinaryOperationTypeResolver',
           TD(noobj),
           TD(O, f='npy_ObjectMax')
           ),
 'fmin' :
     Ufunc(2, 1, ReorderableNone,
           docstrings.get('numpy.core.umath.fmin'),
-          'PyUFunc_SimpleBinaryOperationTypeResolution',
+          'PyUFunc_SimpleBinaryOperationTypeResolver',
           TD(noobj),
           TD(O, f='npy_ObjectMin')
           ),
@@ -928,7 +928,7 @@ r"""f = PyUFunc_FromFuncAndData(%s_functions, %s_data, %s_signatures, %d,
                                                 name, docstring))
         if uf.typereso != None:
             mlist.append(
-                r"((PyUFuncObject *)f)->type_resolution_function = &%s;" %
+                r"((PyUFuncObject *)f)->type_resolver = &%s;" %
                                                                 uf.typereso)
         mlist.append(r"""PyDict_SetItemString(dictionary, "%s", f);""" % name)
         mlist.append(r"""Py_DECREF(f);""")
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 3288a5749..08266d6ca 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -385,7 +385,7 @@ ufunc_funcs_api = {
     'PyUFunc_ee_e_As_ff_f':                     37,
     'PyUFunc_ee_e_As_dd_d':                     38,
     # End 1.6 API
-    'PyUFunc_DefaultTypeResolution':            39,
+    'PyUFunc_DefaultTypeResolver':              39,
     'PyUFunc_ValidateCasting':                  40,
 }
 
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index f8d44f99a..7bdcdff81 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1787,7 +1787,7 @@ typedef struct {
  ************************************************************/
 
 /*
- * This is a function for assigning a reduction unit to the result,
+ * This is a function for assigning a reduction identity to the result,
  * before doing the reduction computation. If 'preservena' is True,
  * any masked NA values in 'result' should not be overwritten. The
  * value in 'data' is passed through from PyArray_ReduceWrapper.
@@ -1797,30 +1797,30 @@ typedef struct {
  *
  * It should return -1 on failure, or 0 on success.
  */
-typedef int (PyArray_AssignReduceUnitFunc)(PyArrayObject *result,
+typedef int (PyArray_AssignReduceIdentityFunc)(PyArrayObject *result,
                                             int preservena, void *data);
 
 /*
- * This is a function for the inner reduce loop. Both the unmasked and
+ * This is a function for the reduce loop. Both the unmasked and
  * masked variants have the same prototype, but should behave differently.
  *
  * The needs_api parameter indicates whether it's ok to release the GIL during
- * the inner loop, such as when the iternext() function never calls
+ * the loop, such as when the iternext() function never calls
  * a function which could raise a Python exception.
  *
  * Ths skip_first_count parameter indicates how many elements need to be
  * skipped based on NpyIter_IsFirstVisit checks. This can only be positive
- * when the 'assign_unit' parameter was NULL when calling
+ * when the 'assign_identity' parameter was NULL when calling
  * PyArray_ReduceWrapper.
  *
- * The unmasked inner loop gets two data pointers and two strides, and should
+ * The unmasked loop gets two data pointers and two strides, and should
  * look roughly like this:
  *  {
  *      NPY_BEGIN_THREADS_DEF;
  *      if (!needs_api) {
  *          NPY_BEGIN_THREADS;
  *      }
- *      // This first-visit loop can be skipped if 'assign_unit' was non-NULL
+ *      // This first-visit loop can be skipped if 'assign_identity' was non-NULL
  *      if (skip_first_count > 0) {
  *          do {
  *              char *data0 = dataptr[0], *data1 = dataptr[1];
@@ -1878,8 +1878,8 @@ typedef int (PyArray_AssignReduceUnitFunc)(PyArrayObject *result,
  *      return (needs_api && PyErr_Occurred()) ? -1 : 0;
  *  }
  *
- * The masked inner loop gets three data pointers and three strides, and
- * looks identical except for the iteration inner loops which should be
+ * The masked loop gets three data pointers and three strides, and
+ * looks identical except for the iteration loops which should be
  * like this:
  *      do {
  *          char *data0 = dataptr[0], *data1 = dataptr[1], *data2 = dataptr[2];
@@ -1907,7 +1907,7 @@ typedef int (PyArray_AssignReduceUnitFunc)(PyArrayObject *result,
  * to check if an error occurred during processing, and return -1 for
  * error, 0 for success.
  */
-typedef int (PyArray_ReduceInnerLoopFunc)(NpyIter *iter,
+typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
                                             char **dataptr,
                                             npy_intp *strideptr,
                                             npy_intp *countptr,
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index efcfa9a1c..88198a449 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -38,7 +38,7 @@ typedef void (PyUFunc_MaskedStridedInnerLoopFunc)(
                 npy_intp count,
                 NpyAuxData *innerloopdata);
 
-/* Forward declaration for the type resolution function */
+/* Forward declaration for the type resolver and loop selector typedefs */
 struct _tagPyUFuncObject;
 
 /*
@@ -99,25 +99,31 @@ typedef int (PyUFunc_TypeResolutionFunc)(
  *                    loop for the given type.
  * out_innerloopdata: Should be populated with the void* data to
  *                    be passed into the out_innerloop function.
+ * out_needs_api:     If the inner loop needs to use the Python API,
+ *                    should set the to 1, otherwise should leave
+ *                    this untouched.
  */
 typedef int (PyUFunc_LegacyInnerLoopSelectionFunc)(
                             struct _tagPyUFuncObject *ufunc,
                             PyArray_Descr **dtypes,
                             PyUFuncGenericFunction *out_innerloop,
-                            void **out_innerloopdata);
+                            void **out_innerloopdata,
+                            int *out_needs_api);
 typedef int (PyUFunc_InnerLoopSelectionFunc)(
                             struct _tagPyUFuncObject *ufunc,
                             PyArray_Descr **dtypes,
                             npy_intp *fixed_strides,
                             PyUFunc_StridedInnerLoopFunc **out_innerloop,
-                            NpyAuxData **out_innerloopdata);
+                            NpyAuxData **out_innerloopdata,
+                            int *out_needs_api);
 typedef int (PyUFunc_MaskedInnerLoopSelectionFunc)(
                             struct _tagPyUFuncObject *ufunc,
                             PyArray_Descr **dtypes,
                             npy_intp *fixed_strides,
                             npy_intp fixed_mask_stride,
                             PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
-                            NpyAuxData **out_innerloopdata);
+                            NpyAuxData **out_innerloopdata,
+                            int *out_needs_api);
 
 typedef struct _tagPyUFuncObject {
         PyObject_HEAD
@@ -185,7 +191,7 @@ typedef struct _tagPyUFuncObject {
          * A function which resolves the types and fills an array
          * with the dtypes for the inputs and outputs.
          */
-        PyUFunc_TypeResolutionFunc *type_resolution_function;
+        PyUFunc_TypeResolutionFunc *type_resolver;
         /*
          * A function which returns an inner loop written for
          * NumPy 1.6 and earlier ufuncs. This is for backwards
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 09431e9dc..32f190a4d 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -1893,15 +1893,15 @@ count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides)
 }
 
 static int
-assign_reduce_unit_zero(PyArrayObject *result, int preservena, void *data)
+assign_reduce_identity_zero(PyArrayObject *result, int preservena, void *data)
 {
     return PyArray_AssignZero(result, NULL, preservena, NULL);
 }
 
 static int
-reduce_count_nonzero_inner_loop(NpyIter *iter,
+reduce_count_nonzero_loop(NpyIter *iter,
                                             char **dataptr,
-                                            npy_intp *strideptr,
+                                            npy_intp *strides,
                                             npy_intp *countptr,
                                             NpyIter_IterNextFunc *iternext,
                                             int needs_api,
@@ -1919,7 +1919,7 @@ reduce_count_nonzero_inner_loop(NpyIter *iter,
 
     do {
         char *data0 = dataptr[0], *data1 = dataptr[1];
-        npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
+        npy_intp stride0 = strides[0], stride1 = strides[1];
         npy_intp count = *countptr;
 
         while (count--) {
@@ -1939,9 +1939,9 @@ reduce_count_nonzero_inner_loop(NpyIter *iter,
 }
 
 static int
-reduce_count_nonzero_masked_inner_loop(NpyIter *iter,
+reduce_count_nonzero_masked_loop(NpyIter *iter,
                                             char **dataptr,
-                                            npy_intp *strideptr,
+                                            npy_intp *strides,
                                             npy_intp *countptr,
                                             NpyIter_IterNextFunc *iternext,
                                             int needs_api,
@@ -1959,8 +1959,8 @@ reduce_count_nonzero_masked_inner_loop(NpyIter *iter,
 
     do {
         char *data0 = dataptr[0], *data1 = dataptr[1], *data2 = dataptr[2];
-        npy_intp stride0 = strideptr[0], stride1 = strideptr[1],
-                    stride2 = strideptr[2];
+        npy_intp stride0 = strides[0], stride1 = strides[1],
+                    stride2 = strides[2];
         npy_intp count = *countptr;
 
         while (count--) {
@@ -2011,10 +2011,11 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
 
     result = PyArray_ReduceWrapper(arr, out,
                             PyArray_DESCR(arr), dtype,
+                            NPY_SAME_KIND_CASTING,
                             axis_flags, 1, skipna, keepdims,
-                            &assign_reduce_unit_zero,
-                            &reduce_count_nonzero_inner_loop,
-                            &reduce_count_nonzero_masked_inner_loop,
+                            &assign_reduce_identity_zero,
+                            &reduce_count_nonzero_loop,
+                            &reduce_count_nonzero_masked_loop,
                             nonzero, 0, "count_nonzero");
     Py_DECREF(dtype);
     if (out == NULL && result != NULL) {
diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c
index 03c15b420..0cc513a04 100644
--- a/numpy/core/src/multiarray/reduction.c
+++ b/numpy/core/src/multiarray/reduction.c
@@ -506,13 +506,14 @@ PyArray_InitializeReduceResult(
 /*NUMPY_API
  *
  * This function executes all the standard NumPy reduction function
- * boilerplate code, just calling assign_unit and the appropriate
+ * boilerplate code, just calling assign_identity and the appropriate
  * inner loop function where necessary.
  *
  * operand     : The array to be reduced.
  * out         : NULL, or the array into which to place the result.
  * operand_dtype : The dtype the inner loop expects for the operand.
  * result_dtype : The dtype the inner loop expects for the result.
+ * casting     : The casting rule to apply to the operands.
  * axis_flags  : Flags indicating the reduction axes of 'operand'.
  * reorderable : If True, the reduction being done is reorderable, which
  *               means specifying multiple axes of reduction at once is ok,
@@ -522,12 +523,12 @@ PyArray_InitializeReduceResult(
  * skipna      : If true, NAs are skipped instead of propagating.
  * keepdims    : If true, leaves the reduction dimensions in the result
  *               with size one.
- * assign_unit : If NULL, PyArray_InitializeReduceResult is used, otherwise
+ * assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise
  *               this function is called to initialize the result to
  *               the reduction's unit.
- * inner_loop  : The inner loop which does the reduction.
- * masked_inner_loop: The inner loop which does the reduction with a mask.
- * data        : Data which is passed to assign_unit and the inner loop.
+ * loop        : The loop which does the reduction.
+ * masked_loop : The loop which does the reduction with a mask.
+ * data        : Data which is passed to assign_identity and the inner loop.
  * buffersize  : Buffer size for the iterator. For the default, pass in 0.
  * funcname    : The name of the reduction function, for error messages.
  */
@@ -535,11 +536,12 @@ NPY_NO_EXPORT PyArrayObject *
 PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
                         PyArray_Descr *operand_dtype,
                         PyArray_Descr *result_dtype,
+                        NPY_CASTING casting,
                         npy_bool *axis_flags, int reorderable,
                         int skipna, int keepdims,
-                        PyArray_AssignReduceUnitFunc *assign_unit,
-                        PyArray_ReduceInnerLoopFunc *inner_loop,
-                        PyArray_ReduceInnerLoopFunc *masked_inner_loop,
+                        PyArray_AssignReduceIdentityFunc *assign_identity,
+                        PyArray_ReduceLoopFunc *loop,
+                        PyArray_ReduceLoopFunc *masked_loop,
                         void *data, npy_intp buffersize, const char *funcname)
 {
     int use_maskna;
@@ -604,7 +606,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
      * Initialize the result to the reduction unit if possible,
      * otherwise copy the initial values and get a view to the rest.
      */
-    if (assign_unit != NULL) {
+    if (assign_identity != NULL) {
         /*
          * If this reduction is non-reorderable, make sure there are
          * only 0 or 1 axes in axis_flags.
@@ -614,7 +616,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
             return NULL;
         }
 
-        if (assign_unit(result, !skipna, data) < 0) {
+        if (assign_identity(result, !skipna, data) < 0) {
             goto fail;
         }
         op_view = operand;
@@ -679,7 +681,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
     }
 
     iter = NpyIter_AdvancedNew(2, op, flags,
-                               NPY_KEEPORDER, NPY_SAME_KIND_CASTING,
+                               NPY_KEEPORDER, casting,
                                op_flags,
                                op_dtypes,
                                0, NULL, NULL, buffersize);
@@ -711,14 +713,14 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
 
         /* Straightforward reduction */
         if (!use_maskna) {
-            if (inner_loop == NULL) {
+            if (loop == NULL) {
                 PyErr_Format(PyExc_RuntimeError,
                         "reduction operation %s did not supply an "
                         "unmasked inner loop function", funcname);
                 goto fail;
             }
 
-            if (inner_loop(iter, dataptr, strideptr, countptr,
+            if (loop(iter, dataptr, strideptr, countptr,
                             iternext, needs_api, skip_first_count, data) < 0) {
 
                 goto fail;
@@ -726,14 +728,14 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
         }
         /* Masked reduction */
         else {
-            if (masked_inner_loop == NULL) {
+            if (masked_loop == NULL) {
                 PyErr_Format(PyExc_RuntimeError,
                         "reduction operation %s did not supply a "
                         "masked inner loop function", funcname);
                 goto fail;
             }
 
-            if (masked_inner_loop(iter, dataptr, strideptr, countptr,
+            if (masked_loop(iter, dataptr, strideptr, countptr,
                             iternext, needs_api, skip_first_count, data) < 0) {
                 goto fail;
             }
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index b5635979c..b0ebbf9b0 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1354,9 +1354,10 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
     npy_intp nin = ufunc->nin, nout = ufunc->nout;
     PyUFuncGenericFunction innerloop;
     void *innerloopdata;
+    int needs_api = 0;
 
     if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
-                    &innerloop, &innerloopdata) < 0) {
+                    &innerloop, &innerloopdata, &needs_api) < 0) {
         return -1;
     }
     /* If the loop wants the arrays, provide them. */
@@ -1660,7 +1661,7 @@ execute_ufunc_masked_loop(PyUFuncObject *ufunc,
                         fixed_strides,
                         wheremask != NULL ? fixed_strides[nop]
                                           : fixed_strides[nop + nin],
-                        &innerloop, &innerloopdata) < 0) {
+                        &innerloop, &innerloopdata, &needs_api) < 0) {
             NpyIter_Deallocate(iter);
             return -1;
         }
@@ -1760,6 +1761,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     int i, idim, nop;
     char *ufunc_name;
     int retval = -1, subok = 1;
+    int needs_api = 0;
 
     PyArray_Descr *dtypes[NPY_MAXARGS];
 
@@ -1917,14 +1919,14 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     NPY_UF_DBG_PRINT("Finding inner loop\n");
 
 
-    retval = ufunc->type_resolution_function(ufunc, casting,
+    retval = ufunc->type_resolver(ufunc, casting,
                             op, type_tup, dtypes);
     if (retval < 0) {
         goto fail;
     }
     /* For the generalized ufunc, we get the loop right away too */
     retval = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
-                                    &innerloop, &innerloopdata);
+                                    &innerloop, &innerloopdata, &needs_api);
     if (retval < 0) {
         goto fail;
     }
@@ -2269,7 +2271,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
 
     NPY_UF_DBG_PRINT("Finding inner loop\n");
 
-    retval = ufunc->type_resolution_function(ufunc, casting,
+    retval = ufunc->type_resolver(ufunc, casting,
                             op, type_tup, dtypes);
     if (retval < 0) {
         goto fail;
@@ -2499,122 +2501,270 @@ get_binary_op_function(PyUFuncObject *ufunc, int *otype,
     return -1;
 }
 
-/*
- * Given the output type, finds the specified binary op, and
- * returns a masked inner loop.  The ufunc must have nin==2
- * and nout==1.  The function may modify otype if the given
- * type isn't found.
- *
- * Returns 0 on success, -1 on failure.
- */
 static int
-get_masked_binary_op_function(PyUFuncObject *ufunc, PyArrayObject *arr,
-                        int otype,
-                        PyArray_Descr **out_dtype,
-                        PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
-                        NpyAuxData **out_innerloopdata)
+reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr,
+                        PyArray_Descr *odtype, PyArray_Descr **out_dtype)
 {
     int i, retcode;
     PyArrayObject *op[3] = {arr, arr, NULL};
     PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
     char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
-    npy_intp fixed_strides[3] = {NPY_MAX_INTP, NPY_MAX_INTP, NPY_MAX_INTP};
-
-    NPY_UF_DBG_PRINT1("Getting masked binary op function for type number %d\n",
-                                otype);
+    PyObject *type_tup = NULL;
 
     *out_dtype = NULL;
 
-    /* Build a type tuple if otype is specified */
-    if (otype == NPY_NOTYPE) {
-        /* Use the type resolution function to find our loop */
-        retcode = ufunc->type_resolution_function(
-                            ufunc, NPY_SAME_KIND_CASTING,
-                            op, NULL, dtypes);
-        if (retcode == -1) {
-            return -1;
-        }
-        else if (retcode == -2) {
-            PyErr_SetString(PyExc_RuntimeError,
-                    "type resolution returned NotImplemented");
+    /*
+     * If odtype is specified, make a type tuple for the type
+     * resolution.
+     */
+    if (odtype != NULL) {
+        type_tup = Py_BuildValue("OOO", odtype, odtype, Py_None);
+        if (type_tup == NULL) {
             return -1;
         }
+    }
 
-        /* The selected dtypes should all be equivalent */
-        if (!PyArray_EquivTypes(dtypes[0], dtypes[1]) ||
-                    !PyArray_EquivTypes(dtypes[1], dtypes[2])) {
-            for (i = 0; i < 3; ++i) {
-                Py_DECREF(dtypes[i]);
-            }
-            PyErr_Format(PyExc_RuntimeError,
-                    "could not find a type resolution appropriate for "
-                    "reduce ufunc %s", ufunc_name);
-            return -1;
-        }
+    /* Use the type resolution function to find our loop */
+    retcode = ufunc->type_resolver(
+                        ufunc, NPY_UNSAFE_CASTING,
+                        op, type_tup, dtypes);
+    if (retcode == -1) {
+        return -1;
     }
-    else {
-        PyArray_Descr *otype_dtype = PyArray_DescrFromType(otype);
-        if (otype_dtype == NULL) {
-            return -1;
-        }
-        dtypes[0] = otype_dtype;
-        Py_INCREF(otype_dtype);
-        dtypes[1] = otype_dtype;
-        Py_INCREF(otype_dtype);
-        dtypes[2] = otype_dtype;
+    else if (retcode == -2) {
+        PyErr_Format(PyExc_RuntimeError,
+                "type resolution returned NotImplemented to "
+                "reduce ufunc %s", ufunc_name);
+        return -1;
     }
 
-    /* Get the inner loop for the resolved dtypes */
-    if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
-                            fixed_strides, NPY_MAX_INTP,
-                            out_innerloop, out_innerloopdata) < 0) {
-        Py_DECREF(dtypes[0]);
-        Py_DECREF(dtypes[1]);
-        Py_DECREF(dtypes[2]);
-
+    /*
+     * The first two type should be equivalent. Because of how
+     * reduce has historically behaved in NumPy, the return type
+     * could be different, and it is the return type on which the
+     * reduction occurs.
+     */
+    if (!PyArray_EquivTypes(dtypes[0], dtypes[1])) {
+        for (i = 0; i < 3; ++i) {
+            Py_DECREF(dtypes[i]);
+        }
+        PyErr_Format(PyExc_RuntimeError,
+                "could not find a type resolution appropriate for "
+                "reduce ufunc %s", ufunc_name);
         return -1;
     }
 
-    *out_dtype = dtypes[0];
+    Py_DECREF(dtypes[0]);
     Py_DECREF(dtypes[1]);
-    Py_DECREF(dtypes[2]);
+    *out_dtype = dtypes[2];
 
     return 0;
 }
 
-/*
- * Either:
- *   1) Fills 'result' with the identity, and returns a reference to 'arr'.
- *   2) Copies the first values along each reduction axis into 'result',
- *      returns a view to the rest of the elements of 'arr'.
- */
-static PyArrayObject *
-initialize_reduce_result(int identity, PyArrayObject *result,
-                        npy_bool *axis_flags, PyArrayObject *arr,
-                        int skipna, npy_intp *out_skip_first_count,
-                        char *ufunc_name)
+static int
+assign_reduce_identity_zero(PyArrayObject *result, int preservena, void *data)
 {
-    if (identity == PyUFunc_One) {
-        *out_skip_first_count = 0;
-        if (PyArray_AssignOne(result, NULL, !skipna, NULL) < 0) {
-            return NULL;
-        }
-        Py_INCREF(arr);
-        return arr;
+    return PyArray_AssignZero(result, NULL, preservena, NULL);
+}
+
+static int
+assign_reduce_identity_one(PyArrayObject *result, int preservena, void *data)
+{
+    return PyArray_AssignOne(result, NULL, preservena, NULL);
+}
+
+static int
+reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides,
+            npy_intp *countptr, NpyIter_IterNextFunc *iternext,
+            int needs_api, npy_intp skip_first_count, void *data)
+{
+    PyArray_Descr *dtypes[3], **iter_dtypes;
+    PyUFuncObject *ufunc = (PyUFuncObject *)data;
+    char *dataptrs_copy[3];
+    npy_intp strides_copy[3];
+
+    /* The normal selected inner loop */
+    PyUFuncGenericFunction innerloop = NULL;
+    void *innerloopdata = NULL;
+
+    NPY_BEGIN_THREADS_DEF;
+
+    /* Get the inner loop */
+    iter_dtypes = NpyIter_GetDescrArray(iter);
+    dtypes[0] = iter_dtypes[0];
+    dtypes[1] = iter_dtypes[1];
+    dtypes[2] = iter_dtypes[0];
+    if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
+                            &innerloop, &innerloopdata, &needs_api) < 0) {
+        return -1;
     }
-    else if (identity == PyUFunc_Zero) {
-        *out_skip_first_count = 0;
-        if (PyArray_AssignZero(result, NULL, !skipna, NULL) < 0) {
-            return NULL;
-        }
-        Py_INCREF(arr);
-        return arr;
+
+    if (!needs_api) {
+        NPY_BEGIN_THREADS;
     }
-    else {
-        int reorderable = (identity == PyUFunc_ReorderableNone);
-        return PyArray_InitializeReduceResult(result, arr, axis_flags,
-                        reorderable, skipna, out_skip_first_count, ufunc_name);
+
+    if (skip_first_count > 0) {
+        do {
+            npy_intp count = *countptr;
+
+            /* Skip any first-visit elements */
+            if (NpyIter_IsFirstVisit(iter, 0)) {
+                if (strides[0] == 0) {
+                    --count;
+                    --skip_first_count;
+                    dataptrs[1] += strides[1];
+                }
+                else {
+                    skip_first_count -= count;
+                    count = 0;
+                }
+            }
+
+            /* Turn the two items into three for the inner loop */
+            dataptrs_copy[0] = dataptrs[0];
+            dataptrs_copy[1] = dataptrs[1];
+            dataptrs_copy[2] = dataptrs[0];
+            strides_copy[0] = strides[0];
+            strides_copy[1] = strides[1];
+            strides_copy[2] = strides[0];
+            innerloop(dataptrs_copy, &count,
+                        strides_copy, innerloopdata);
+
+            /* Jump to the faster loop when skipping is done */
+            if (skip_first_count == 0) {
+                if (iternext(iter)) {
+                    break;
+                }
+                else {
+                    goto finish_loop;
+                }
+            }
+        } while (iternext(iter));
+    }
+    do {
+        /* Turn the two items into three for the inner loop */
+        dataptrs_copy[0] = dataptrs[0];
+        dataptrs_copy[1] = dataptrs[1];
+        dataptrs_copy[2] = dataptrs[0];
+        strides_copy[0] = strides[0];
+        strides_copy[1] = strides[1];
+        strides_copy[2] = strides[0];
+        innerloop(dataptrs_copy, countptr,
+                    strides_copy, innerloopdata);
+    } while (iternext(iter));
+
+finish_loop:
+    if (!needs_api) {
+        NPY_END_THREADS;
     }
+
+    return (needs_api && PyErr_Occurred()) ? -1 : 0;
+}
+
+static int
+masked_reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides,
+            npy_intp *countptr, NpyIter_IterNextFunc *iternext,
+            int needs_api, npy_intp skip_first_count, void *data)
+{
+    PyArray_Descr *dtypes[3], **iter_dtypes;
+    npy_intp fixed_strides[3], fixed_mask_stride;
+    PyUFuncObject *ufunc = (PyUFuncObject *)data;
+    char *dataptrs_copy[3];
+    npy_intp strides_copy[3];
+
+    /* The masked selected inner loop */
+    PyUFunc_MaskedStridedInnerLoopFunc *innerloop = NULL;
+    NpyAuxData *innerloopdata = NULL;
+
+    NPY_BEGIN_THREADS_DEF;
+
+    /* Get the inner loop */
+    NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+    fixed_mask_stride = fixed_strides[2];
+    fixed_strides[2] = fixed_strides[0];
+    iter_dtypes = NpyIter_GetDescrArray(iter);
+    dtypes[0] = iter_dtypes[0];
+    dtypes[1] = iter_dtypes[1];
+    dtypes[2] = iter_dtypes[0];
+    if (ufunc->masked_inner_loop_selector(ufunc, dtypes,
+                            fixed_strides, fixed_mask_stride,
+                            &innerloop, &innerloopdata, &needs_api) < 0) {
+        return -1;
+    }
+
+    if (!needs_api) {
+        NPY_BEGIN_THREADS;
+    }
+
+    if (skip_first_count > 0) {
+        do {
+            npy_intp count = *countptr;
+
+            /* Skip any first-visit elements */
+            if (NpyIter_IsFirstVisit(iter, 0)) {
+                if (strides[0] == 0) {
+                    --count;
+                    --skip_first_count;
+                    dataptrs[1] += strides[1];
+                    dataptrs[2] += strides[2];
+                }
+                else {
+                    skip_first_count -= count;
+                    count = 0;
+                }
+            }
+
+            /* Turn the two items into three for the inner loop */
+            dataptrs_copy[0] = dataptrs[0];
+            dataptrs_copy[1] = dataptrs[1];
+            dataptrs_copy[2] = dataptrs[0];
+            strides_copy[0] = strides[0];
+            strides_copy[1] = strides[1];
+            strides_copy[2] = strides[0];
+            /*
+             * If skipna=True, this masks based on the mask in 'arr',
+             * otherwise it masks based on the mask in 'result'
+             */
+            innerloop(dataptrs_copy, strides_copy,
+                        dataptrs[2], strides[2],
+                        count, innerloopdata);
+
+            /* Jump to the faster loop when skipping is done */
+            if (skip_first_count == 0) {
+                if (iternext(iter)) {
+                    break;
+                }
+                else {
+                    goto finish_loop;
+                }
+            }
+        } while (iternext(iter));
+    }
+    do {
+        /* Turn the two items into three for the inner loop */
+        dataptrs_copy[0] = dataptrs[0];
+        dataptrs_copy[1] = dataptrs[1];
+        dataptrs_copy[2] = dataptrs[0];
+        strides_copy[0] = strides[0];
+        strides_copy[1] = strides[1];
+        strides_copy[2] = strides[0];
+        /*
+         * If skipna=True, this masks based on the mask in 'arr',
+         * otherwise it masks based on the mask in 'result'
+         */
+        innerloop(dataptrs_copy, strides_copy,
+                    dataptrs[2], strides[2],
+                    *countptr, innerloopdata);
+    } while (iternext(iter));
+
+finish_loop:
+    if (!needs_api) {
+        NPY_END_THREADS;
+    }
+
+    NPY_AUXDATA_FREE(innerloopdata);
+
+    return (needs_api && PyErr_Occurred()) ? -1 : 0;
 }
 
 /*
@@ -2636,43 +2786,20 @@ initialize_reduce_result(int identity, PyArrayObject *result,
  */
 static PyArrayObject *
 PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
-        int naxes, int *axes, int otype, int skipna, int keepdims)
+        int naxes, int *axes, PyArray_Descr *odtype, int skipna, int keepdims)
 {
-    int iaxes, ndim, retcode;
-    PyArray_Descr *otype_dtype = NULL;
+    int iaxes, reorderable, ndim;
     npy_bool axis_flags[NPY_MAXDIMS];
-    PyArrayObject *arr_view = NULL, *result = NULL;
-    npy_intp skip_first_count = 0;
-
-    /* The normal selected inner loop */
-    PyUFuncGenericFunction innerloop = NULL;
-    void *innerloopdata = NULL;
-
-    /* The masked selected inner loop */
-    int use_maskna = 0;
-    PyUFunc_MaskedStridedInnerLoopFunc *maskedinnerloop = NULL;
-    NpyAuxData *maskedinnerloopdata = NULL;
-
+    PyArray_Descr *dtype;
+    PyArrayObject *result;
+    PyArray_AssignReduceIdentityFunc *assign_identity = NULL;
     char *ufunc_name = ufunc->name ? ufunc->name : "(unknown)";
-
     /* These parameters come from a TLS global */
     int buffersize = 0, errormask = 0;
     PyObject *errobj = NULL;
 
-    /* Iterator parameters */
-    NpyIter *iter = NULL;
-    PyArrayObject *op[3];
-    PyArray_Descr *op_dtypes[2] = {NULL, NULL};
-    npy_uint32 flags, op_flags[2];
-
-    NPY_BEGIN_THREADS_DEF;
-
     ndim = PyArray_NDIM(arr);
 
-    if (PyUFunc_GetPyValues("reduce", &buffersize, &errormask, &errobj) < 0) {
-        return NULL;
-    }
-
     /* Create an array of flags for reduction */
     memset(axis_flags, 0, ndim);
     for (iaxes = 0; iaxes < naxes; ++iaxes) {
@@ -2685,379 +2812,50 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
         axis_flags[axis] = 1;
     }
 
-    use_maskna = PyArray_HASMASKNA(arr);
-
-    /* Detect whether to ignore the MASKNA */
-    if (use_maskna && !skipna && out != NULL && !PyArray_HASMASKNA(out)) {
-        if (PyArray_ContainsNA(arr)) {
-            PyErr_SetString(PyExc_ValueError,
-                    "Cannot assign NA value to an array which "
-                    "does not support NAs");
+    switch (ufunc->identity) {
+        case PyUFunc_Zero:
+            assign_identity = &assign_reduce_identity_zero;
+            reorderable = 1;
+            break;
+        case PyUFunc_One:
+            assign_identity = &assign_reduce_identity_one;
+            reorderable = 1;
+            break;
+        case PyUFunc_None:
+            reorderable = 0;
+            break;
+        case PyUFunc_ReorderableNone:
+            reorderable = 1;
+            break;
+        default:
+            PyErr_Format(PyExc_ValueError,
+                    "ufunc %s has an invalid identity for reduction",
+                    ufunc_name);
             return NULL;
-        }
-        else {
-            use_maskna = 0;
-        }
     }
 
-    /* Get the appropriate ufunc inner loop */
-    if (use_maskna) {
-        retcode = get_masked_binary_op_function(ufunc, arr, otype,
-                        &otype_dtype, &maskedinnerloop, &maskedinnerloopdata);
-    }
-    else {
-        int otype_final = otype;
-        retcode = get_binary_op_function(ufunc, &otype_final,
-                                &innerloop, &innerloopdata);
-
-        NPY_UF_DBG_PRINT2("Loop retcode %d, otype final %d\n",
-                                                retcode, otype_final);
-        /*
-         * Set up the output data type, using the input's exact
-         * data type if the type number didn't change to preserve
-         * metadata
-         */
-        if (PyArray_DESCR(arr)->type_num == otype_final) {
-            if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) {
-                otype_dtype = PyArray_DESCR(arr);
-                Py_INCREF(otype_dtype);
-            }
-            else {
-                otype_dtype = PyArray_DescrNewByteorder(PyArray_DESCR(arr),
-                                                        NPY_NATIVE);
-            }
-        }
-        else {
-            otype_dtype = PyArray_DescrFromType(otype_final);
-        }
-        if (otype_dtype == NULL) {
-            return NULL;
-        }
-    }
-    if (retcode < 0) {
-        //PyArray_Descr *dtype = PyArray_DescrFromType(otype);
-        //PyErr_Format(PyExc_ValueError,
-        //             "could not find a matching type for %s.reduce, "
-        //             "requested type has type code '%c'",
-        //                    ufunc_name, dtype ? dtype->type : '-');
-        //Py_XDECREF(dtype);
+    if (PyUFunc_GetPyValues("reduce", &buffersize, &errormask, &errobj) < 0) {
         return NULL;
     }
 
-    /* If the loop wants the arrays, provide them */
-    if (_does_loop_use_arrays(innerloopdata)) {
-        innerloopdata = (void*)op;
-    }
-
-    /* Allocate an output or conform 'out' to 'ufunc' */
-    Py_XINCREF(otype_dtype);
-    result = PyArray_CreateReduceResult(arr, out,
-                            otype_dtype, axis_flags, !skipna && use_maskna,
-                            keepdims, ufunc_name);
-    if (result == NULL) {
+    /* Get the reduction dtype */
+    if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) {
+        Py_XDECREF(errobj);
         return NULL;
     }
 
-    /* Prepare the NA mask if there is one */
-    if (use_maskna) {
-        /*
-         * Do the reduction on the NA mask before the data. This way
-         * we can avoid modifying the outputs which end up masked, obeying
-         * the required NA masking semantics.
-         */
-        if (!skipna) {
-            if (PyArray_ReduceMaskNAArray(result, arr) < 0) {
-                goto fail;
-            }
+    result = PyArray_ReduceWrapper(arr, out, dtype, dtype,
+                                NPY_UNSAFE_CASTING,
+                                axis_flags, reorderable,
+                                skipna, keepdims,
+                                assign_identity,
+                                reduce_loop,
+                                masked_reduce_loop,
+                                ufunc, buffersize, ufunc_name);
 
-            /* Short circuit any calculation if the result is 0-dim NA */
-            if (PyArray_SIZE(result) == 1 &&
-                    !NpyMaskValue_IsExposed(
-                                (npy_mask)*PyArray_MASKNA_DATA(result))) {
-                goto finish;
-            }
-        }
-        else {
-            /* Special case a one-value input */
-            if (PyArray_SIZE(arr) == 1) {
-                if (NpyMaskValue_IsExposed(
-                                (npy_mask)*PyArray_MASKNA_DATA(arr))) {
-                    /* Copy the element into the result */
-                    if (PyArray_CopyInto(result, arr) < 0) {
-                        goto finish;
-                    }
-                }
-                else {
-                    PyErr_Format(PyExc_ValueError,
-                            "fully NA array with skipna=True to "
-                            "%s.reduce which has no identity", ufunc_name);
-                    goto fail;
-                }
-            }
-
-            /*
-             * If the result has a mask (i.e. from the out= parameter),
-             * Set it to all exposed.
-             */
-            if (PyArray_HASMASKNA(result)) {
-                if (PyArray_AssignMaskNA(result, NULL, 1) < 0) {
-                    goto fail;
-                }
-            }
-        }
-    }
-
-    /*
-     * Initialize 'result' to the identity or initial elements
-     * copied from 'arr', and create a view of 'arr' containing
-     * all the elements to reduce into 'result'.
-     */
-    arr_view = initialize_reduce_result(ufunc->identity, result,
-                                        axis_flags, arr, skipna,
-                                        &skip_first_count, ufunc_name);
-    if (arr_view == NULL) {
-        goto fail;
-    }
-    if (PyArray_SIZE(arr_view) == 0) {
-        Py_DECREF(arr_view);
-        arr_view = NULL;
-        goto finish;
-    }
-
-    /* Now we can do a loop applying the ufunc in a straightforward manner */
-    op[0] = result;
-    op[1] = arr_view;
-    /* op is length 3 in case the inner loop wanted these as its data */
-    op[2] = result;
-    op_dtypes[0] = otype_dtype;
-    op_dtypes[1] = otype_dtype;
-
-    flags = NPY_ITER_BUFFERED |
-            NPY_ITER_EXTERNAL_LOOP |
-            NPY_ITER_GROWINNER |
-            NPY_ITER_DONT_NEGATE_STRIDES |
-            NPY_ITER_ZEROSIZE_OK |
-            NPY_ITER_REDUCE_OK |
-            NPY_ITER_REFS_OK;
-    op_flags[0] = NPY_ITER_READWRITE |
-                  NPY_ITER_ALIGNED |
-                  NPY_ITER_NO_SUBTYPE;
-    op_flags[1] = NPY_ITER_READONLY |
-                  NPY_ITER_ALIGNED;
-
-    /* Add mask-related flags */
-    if (use_maskna) {
-        if (skipna) {
-            /* The output's mask has been set to all exposed already */
-            op_flags[0] |= NPY_ITER_IGNORE_MASKNA;
-            /* Need the input's mask to determine what to skip */
-            op_flags[1] |= NPY_ITER_USE_MASKNA;
-        }
-        else {
-            /* Iterate over the output's mask */
-            op_flags[0] |= NPY_ITER_USE_MASKNA;
-            /* The input's mask is already incorporated in the output's mask */
-            op_flags[1] |= NPY_ITER_IGNORE_MASKNA;
-        }
-    }
-    else {
-        /*
-         * If 'out' had no mask, and 'arr' did, we checked that 'arr'
-         * contains no NA values and can ignore the masks.
-         */
-        op_flags[0] |= NPY_ITER_IGNORE_MASKNA;
-        op_flags[1] |= NPY_ITER_IGNORE_MASKNA;
-    }
-
-    iter = NpyIter_MultiNew(2, op, flags,
-                               NPY_KEEPORDER, NPY_UNSAFE_CASTING,
-                               op_flags,
-                               op_dtypes);
-    if (iter == NULL) {
-        goto fail;
-    }
-
-    if (NpyIter_GetIterSize(iter) != 0) {
-        int needs_api;
-        NpyIter_IterNextFunc *iternext;
-        char **dataptr;
-        npy_intp *strides;
-        npy_intp *countptr;
-
-        char *dataptr_copy[3];
-        npy_intp strides_copy[3];
-
-        iternext = NpyIter_GetIterNext(iter, NULL);
-        if (iternext == NULL) {
-            goto fail;
-        }
-        dataptr = NpyIter_GetDataPtrArray(iter);
-        strides = NpyIter_GetInnerStrideArray(iter);
-        countptr = NpyIter_GetInnerLoopSizePtr(iter);
-
-        needs_api = NpyIter_IterationNeedsAPI(iter) ||
-                    PyDataType_REFCHK(otype_dtype);
-
-        if (!needs_api) {
-            NPY_BEGIN_THREADS;
-        }
-
-        /* Straightforward reduction */
-        if (!use_maskna) {
-            if (skip_first_count > 0) {
-                do {
-                    npy_intp count = *countptr;
-
-                    /* Skip any first-visit elements */
-                    if (NpyIter_IsFirstVisit(iter, 0)) {
-                        if (strides[0] == 0) {
-                            --count;
-                            --skip_first_count;
-                            dataptr[1] += strides[1];
-                        }
-                        else {
-                            skip_first_count -= count;
-                            count = 0;
-                        }
-                    }
-
-                    /* Turn the two items into three for the inner loop */
-                    dataptr_copy[0] = dataptr[0];
-                    dataptr_copy[1] = dataptr[1];
-                    dataptr_copy[2] = dataptr[0];
-                    strides_copy[0] = strides[0];
-                    strides_copy[1] = strides[1];
-                    strides_copy[2] = strides[0];
-                    innerloop(dataptr_copy, &count,
-                                strides_copy, innerloopdata);
-
-                    /* Jump to the faster loop when skipping is done */
-                    if (skip_first_count == 0) {
-                        if (iternext(iter)) {
-                            break;
-                        }
-                        else {
-                            goto finish_loop;
-                        }
-                    }
-                } while (iternext(iter));
-            }
-            do {
-                /* Turn the two items into three for the inner loop */
-                dataptr_copy[0] = dataptr[0];
-                dataptr_copy[1] = dataptr[1];
-                dataptr_copy[2] = dataptr[0];
-                strides_copy[0] = strides[0];
-                strides_copy[1] = strides[1];
-                strides_copy[2] = strides[0];
-                innerloop(dataptr_copy, countptr,
-                            strides_copy, innerloopdata);
-            } while (iternext(iter));
-        }
-        /* Masked reduction */
-        else {
-            if (skip_first_count > 0) {
-                do {
-                    npy_intp count = *countptr;
-
-                    /* Skip any first-visit elements */
-                    if (NpyIter_IsFirstVisit(iter, 0)) {
-                        if (strides[0] == 0) {
-                            --count;
-                            --skip_first_count;
-                            dataptr[1] += strides[1];
-                            dataptr[2] += strides[2];
-                        }
-                        else {
-                            skip_first_count -= count;
-                            count = 0;
-                        }
-                    }
-
-                    /* Turn the two items into three for the inner loop */
-                    dataptr_copy[0] = dataptr[0];
-                    dataptr_copy[1] = dataptr[1];
-                    dataptr_copy[2] = dataptr[0];
-                    strides_copy[0] = strides[0];
-                    strides_copy[1] = strides[1];
-                    strides_copy[2] = strides[0];
-                    /*
-                     * If skipna=True, this masks based on the mask in 'arr',
-                     * otherwise it masks based on the mask in 'result'
-                     */
-                    maskedinnerloop(dataptr_copy, strides_copy,
-                                dataptr[2], strides[2],
-                                count, maskedinnerloopdata);
-
-                    /* Jump to the faster loop when skipping is done */
-                    if (skip_first_count == 0) {
-                        if (iternext(iter)) {
-                            break;
-                        }
-                        else {
-                            goto finish_loop;
-                        }
-                    }
-                } while (iternext(iter));
-            }
-            do {
-                /* Turn the two items into three for the inner loop */
-                dataptr_copy[0] = dataptr[0];
-                dataptr_copy[1] = dataptr[1];
-                dataptr_copy[2] = dataptr[0];
-                strides_copy[0] = strides[0];
-                strides_copy[1] = strides[1];
-                strides_copy[2] = strides[0];
-                /*
-                 * If skipna=True, this masks based on the mask in 'arr',
-                 * otherwise it masks based on the mask in 'result'
-                 */
-                maskedinnerloop(dataptr_copy, strides_copy,
-                            dataptr[2], strides[2],
-                            *countptr, maskedinnerloopdata);
-            } while (iternext(iter));
-        }
-finish_loop:
-        if (!needs_api) {
-            NPY_END_THREADS;
-        }
-
-        if (needs_api && PyErr_Occurred()) {
-            goto fail;
-        }
-    }
-
-finish:
-
-    /* Strip out the extra 'one' dimensions in the result */
-    if (out == NULL) {
-        if (!keepdims) {
-            PyArray_RemoveAxesInPlace(result, axis_flags);
-        }
-    }
-    else {
-        Py_DECREF(result);
-        result = out;
-        Py_INCREF(result);
-    }
-
-    if (iter != NULL) {
-        NpyIter_Deallocate(iter);
-    }
-    Py_XDECREF(arr_view);
-    Py_XDECREF(otype_dtype);
-    NPY_AUXDATA_FREE(maskedinnerloopdata);
+    Py_DECREF(dtype);
+    Py_XDECREF(errobj);
     return result;
-
-fail:
-    if (iter != NULL) {
-        NpyIter_Deallocate(iter);
-    }
-    Py_XDECREF(result);
-    Py_XDECREF(arr_view);
-    Py_XDECREF(otype_dtype);
-    NPY_AUXDATA_FREE(maskedinnerloopdata);
-    return NULL;
 }
 
 
@@ -4055,7 +3853,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args,
     switch(operation) {
     case UFUNC_REDUCE:
         ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes,
-                                          otype->type_num, skipna, keepdims);
+                                          otype, skipna, keepdims);
         break;
     case UFUNC_ACCUMULATE:
         if (naxes != 1) {
@@ -4537,7 +4335,7 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data,
     ufunc->userloops=NULL;
 
     /* Type resolution and inner loop selection functions */
-    ufunc->type_resolution_function = &PyUFunc_DefaultTypeResolution;
+    ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
     ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector;
     ufunc->inner_loop_selector = NULL;
     ufunc->masked_inner_loop_selector = &PyUFunc_DefaultMaskedInnerLoopSelector;
@@ -4586,7 +4384,7 @@ PyUFunc_SetUsesArraysAsData(void **data, size_t i)
  * Return 1 if the given data pointer for the loop specifies that it needs the
  * arrays as the data pointer.
  *
- * NOTE: This is easier to specify with the type_resolution_function
+ * NOTE: This is easier to specify with the type_resolver
  *       in the ufunc object.
  *
  * TODO: Remove this, since this is already basically broken
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 8a2041a24..cfb12fb00 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -127,7 +127,7 @@ ensure_dtype_nbo(PyArray_Descr *type)
  * Returns 0 on success, -1 on error.
  */
 NPY_NO_EXPORT int
-PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_DefaultTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -155,12 +155,12 @@ PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
 
     if (type_tup == NULL) {
         /* Find the best ufunc inner loop, and fill in the dtypes */
-        retval = linear_search_type_resolution(ufunc, operands,
+        retval = linear_search_type_resolver(ufunc, operands,
                         input_casting, casting, any_object,
                         out_dtypes);
     } else {
         /* Find the specified ufunc inner loop, and fill in the dtypes */
-        retval = type_tuple_type_resolution(ufunc, type_tup,
+        retval = type_tuple_type_resolver(ufunc, type_tup,
                         operands, casting, any_object, out_dtypes);
     }
 
@@ -180,7 +180,7 @@ PyUFunc_DefaultTypeResolution(PyUFuncObject *ufunc,
  * Returns 0 on success, -1 on error.
  */
 NPY_NO_EXPORT int
-PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -207,7 +207,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
     type_num2 = PyArray_DESCR(operands[1])->type_num;
     if (type_num1 >= NPY_NTYPES || type_num2 >= NPY_NTYPES ||
             type_num1 == NPY_OBJECT || type_num2 == NPY_OBJECT) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                 type_tup, out_dtypes);
     }
 
@@ -226,7 +226,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
          * default type resolution handle this one.
          */
         if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
-            return PyUFunc_DefaultTypeResolution(ufunc, casting,
+            return PyUFunc_DefaultTypeResolver(ufunc, casting,
                     operands, type_tup, out_dtypes);
         }
 
@@ -279,7 +279,7 @@ PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
  * Returns 0 on success, -1 on error.
  */
 NPY_NO_EXPORT int
-PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SimpleUnaryOperationTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -304,7 +304,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
      */
     type_num1 = PyArray_DESCR(operands[0])->type_num;
     if (type_num1 >= NPY_NTYPES || type_num1 == NPY_OBJECT) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                 type_tup, out_dtypes);
     }
 
@@ -323,7 +323,7 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
          * default type resolution handle this one.
          */
         if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
-            return PyUFunc_DefaultTypeResolution(ufunc, casting,
+            return PyUFunc_DefaultTypeResolver(ufunc, casting,
                     operands, type_tup, out_dtypes);
         }
 
@@ -360,13 +360,13 @@ PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
  * casting.
  */
 NPY_NO_EXPORT int
-PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_OnesLikeTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING NPY_UNUSED(casting),
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes)
 {
-    return PyUFunc_SimpleUnaryOperationTypeResolution(ufunc,
+    return PyUFunc_SimpleUnaryOperationTypeResolver(ufunc,
                         NPY_UNSAFE_CASTING,
                         operands, type_tup, out_dtypes);
 }
@@ -385,7 +385,7 @@ PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
  * Returns 0 on success, -1 on error.
  */
 NPY_NO_EXPORT int
-PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SimpleBinaryOperationTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -412,7 +412,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
     type_num2 = PyArray_DESCR(operands[1])->type_num;
     if (type_num1 >= NPY_NTYPES || type_num2 >= NPY_NTYPES ||
             type_num1 == NPY_OBJECT || type_num2 == NPY_OBJECT) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                 type_tup, out_dtypes);
     }
 
@@ -433,7 +433,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
          * default type resolution handle this one.
          */
         if (!PyTuple_Check(type_tup) || PyTuple_GET_SIZE(type_tup) != 1) {
-            return PyUFunc_DefaultTypeResolution(ufunc, casting,
+            return PyUFunc_DefaultTypeResolver(ufunc, casting,
                     operands, type_tup, out_dtypes);
         }
 
@@ -474,7 +474,7 @@ PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
  * Returns 0 on success, -1 on error.
  */
 NPY_NO_EXPORT int
-PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -482,11 +482,11 @@ PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
 {
     /* Use the default for complex types, to find the loop producing float */
     if (PyTypeNum_ISCOMPLEX(PyArray_DESCR(operands[0])->type_num)) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                     type_tup, out_dtypes);
     }
     else {
-        return PyUFunc_SimpleUnaryOperationTypeResolution(ufunc, casting,
+        return PyUFunc_SimpleUnaryOperationTypeResolver(ufunc, casting,
                     operands, type_tup, out_dtypes);
     }
 }
@@ -578,7 +578,7 @@ timedelta_dtype_with_copied_meta(PyArray_Descr *dtype)
  *    m8[Y|M|B] + M8[<A>]
  */
 NPY_NO_EXPORT int
-PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -595,7 +595,7 @@ PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
 
     /* Use the default when datetime and timedelta are not involved */
     if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                     type_tup, out_dtypes);
     }
 
@@ -767,7 +767,7 @@ type_reso_error: {
  *    M8[<A>] - m8[Y|M|B]
  */
 NPY_NO_EXPORT int
-PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -784,7 +784,7 @@ PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
 
     /* Use the default when datetime and timedelta are not involved */
     if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                     type_tup, out_dtypes);
     }
 
@@ -934,7 +934,7 @@ type_reso_error: {
  *    m8[<A>] * float## => m8[<A>] * float64
  */
 NPY_NO_EXPORT int
-PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -951,7 +951,7 @@ PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
 
     /* Use the default when datetime and timedelta are not involved */
     if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                     type_tup, out_dtypes);
     }
 
@@ -1076,7 +1076,7 @@ type_reso_error: {
  *    m8[<A>] / float## to m8[<A>] / float64 -> m8[<A>]
  */
 NPY_NO_EXPORT int
-PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -1093,7 +1093,7 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
 
     /* Use the default when datetime and timedelta are not involved */
     if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
-        return PyUFunc_DefaultTypeResolution(ufunc, casting, operands,
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
                     type_tup, out_dtypes);
     }
 
@@ -1244,7 +1244,8 @@ NPY_NO_EXPORT int
 PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
                                 PyArray_Descr **dtypes,
                                 PyUFuncGenericFunction *out_innerloop,
-                                void **out_innerloopdata)
+                                void **out_innerloopdata,
+                                int *out_needs_api)
 {
     int nargs = ufunc->nargs;
     char *types;
@@ -1394,7 +1395,8 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
                             npy_intp *NPY_UNUSED(fixed_strides),
                             npy_intp NPY_UNUSED(fixed_mask_stride),
                             PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
-                            NpyAuxData **out_innerloopdata)
+                            NpyAuxData **out_innerloopdata,
+                            int *out_needs_api)
 {
     int retcode;
     _ufunc_masker_data *data;
@@ -1420,7 +1422,8 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
 
     /* Get the unmasked ufunc inner loop */
     retcode = ufunc->legacy_inner_loop_selector(ufunc, dtypes,
-                    &data->unmasked_innerloop, &data->unmasked_innerloopdata);
+                    &data->unmasked_innerloop, &data->unmasked_innerloopdata,
+                    out_needs_api);
     if (retcode < 0) {
         PyArray_free(data);
         return retcode;
@@ -1527,31 +1530,60 @@ ufunc_loop_matches(PyUFuncObject *self,
 
 static int
 set_ufunc_loop_data_types(PyUFuncObject *self, PyArrayObject **op,
-                    PyArray_Descr **out_dtype,
-                    int *types)
+                    PyArray_Descr **out_dtypes,
+                    int *type_nums)
 {
     int i, nin = self->nin, nop = nin + self->nout;
 
-    /* Fill the dtypes array */
+    /*
+     * Fill the dtypes array.
+     * For outputs,
+     * also search the inputs for a matching type_num to copy
+     * instead of creating a new one, similarly to preserve metadata.
+     **/
     for (i = 0; i < nop; ++i) {
-        out_dtype[i] = PyArray_DescrFromType(types[i]);
-        if (out_dtype[i] == NULL) {
-            while (--i >= 0) {
-                Py_DECREF(out_dtype[i]);
-                out_dtype[i] = NULL;
-            }
-            return -1;
+        /*
+         * Copy the dtype from 'op' if the type_num matches,
+         * to preserve metadata.
+         */
+        if (op[i] != NULL && PyArray_DESCR(op[i])->type_num == type_nums[i]) {
+            out_dtypes[i] = ensure_dtype_nbo(PyArray_DESCR(op[i]));
+            Py_XINCREF(out_dtypes[i]);
+        }
+        /*
+         * For outputs, copy the dtype from op[0] if the type_num
+         * matches, similarly to preserve metdata.
+         */
+        else if (i >= nin && op[0] != NULL &&
+                            PyArray_DESCR(op[0])->type_num == type_nums[i]) {
+            out_dtypes[i] = ensure_dtype_nbo(PyArray_DESCR(op[0]));
+            Py_XINCREF(out_dtypes[i]);
+        }
+        /* Otherwise create a plain descr from the type number */
+        else {
+            out_dtypes[i] = PyArray_DescrFromType(type_nums[i]);
+        }
+
+        if (out_dtypes[i] == NULL) {
+            goto fail;
         }
     }
 
     return 0;
+
+fail:
+    while (--i >= 0) {
+        Py_DECREF(out_dtypes[i]);
+        out_dtypes[i] = NULL;
+    }
+    return -1;
 }
 
 /*
  * Does a search through the arguments and the loops
  */
 static int
-linear_search_userloop_type_resolution(PyUFuncObject *self,
+linear_search_userloop_type_resolver(PyUFuncObject *self,
                         PyArrayObject **op,
                         NPY_CASTING input_casting,
                         NPY_CASTING output_casting,
@@ -1615,7 +1647,7 @@ linear_search_userloop_type_resolution(PyUFuncObject *self,
  * Does a search through the arguments and the loops
  */
 static int
-type_tuple_userloop_type_resolution(PyUFuncObject *self,
+type_tuple_userloop_type_resolver(PyUFuncObject *self,
                         int n_specified,
                         int *specified_types,
                         PyArrayObject **op,
@@ -1656,7 +1688,8 @@ type_tuple_userloop_type_resolution(PyUFuncObject *self,
 
                 if (n_specified == nop) {
                     for (j = 0; j < nop; ++j) {
-                        if (types[j] != specified_types[j]) {
+                        if (types[j] != specified_types[j] &&
+                                    specified_types[j] != NPY_NOTYPE) {
                             matched = 0;
                             break;
                         }
@@ -1778,7 +1811,7 @@ should_use_min_scalar(PyArrayObject **op, int nop)
  * references in out_dtype.  This function does not do its own clean-up.
  */
 NPY_NO_EXPORT int
-linear_search_type_resolution(PyUFuncObject *self,
+linear_search_type_resolver(PyUFuncObject *self,
                         PyArrayObject **op,
                         NPY_CASTING input_casting,
                         NPY_CASTING output_casting,
@@ -1799,7 +1832,7 @@ linear_search_type_resolution(PyUFuncObject *self,
 
     /* If the ufunc has userloops, search for them. */
     if (self->userloops) {
-        switch (linear_search_userloop_type_resolution(self, op,
+        switch (linear_search_userloop_type_resolver(self, op,
                                 input_casting, output_casting,
                                 any_object, use_min_scalar, out_dtype,
                                 &no_castable_output, &err_src_typecode,
@@ -1886,7 +1919,7 @@ linear_search_type_resolution(PyUFuncObject *self,
  * references in out_dtype.  This function does not do its own clean-up.
  */
 NPY_NO_EXPORT int
-type_tuple_type_resolution(PyUFuncObject *self,
+type_tuple_type_resolver(PyUFuncObject *self,
                         PyObject *type_tup,
                         PyArrayObject **op,
                         NPY_CASTING casting,
@@ -1908,23 +1941,37 @@ type_tuple_type_resolution(PyUFuncObject *self,
 
     /* Fill in specified_types from the tuple or string */
     if (PyTuple_Check(type_tup)) {
+        int nonecount = 0;
         n = PyTuple_GET_SIZE(type_tup);
         if (n != 1 && n != nop) {
             PyErr_Format(PyExc_ValueError,
-                         "a type-tuple must be specified " \
+                         "a type-tuple must be specified "
                          "of length 1 or %d for ufunc '%s'", (int)nop,
                          self->name ? self->name : "(unknown)");
             return -1;
         }
 
         for (i = 0; i < n; ++i) {
-            PyArray_Descr *dtype = NULL;
-            if (!PyArray_DescrConverter(PyTuple_GET_ITEM(type_tup, i),
-                                                                &dtype)) {
-                return -1;
+            PyObject *item = PyTuple_GET_ITEM(type_tup, i);
+            if (item == Py_None) {
+                specified_types[i] = NPY_NOTYPE;
+                ++nonecount;
             }
-            specified_types[i] = dtype->type_num;
-            Py_DECREF(dtype);
+            else {
+                PyArray_Descr *dtype = NULL;
+                if (!PyArray_DescrConverter(item, &dtype)) {
+                    return -1;
+                }
+                specified_types[i] = dtype->type_num;
+                Py_DECREF(dtype);
+            }
+        }
+
+        if (nonecount == n) {
+            PyErr_SetString(PyExc_ValueError,
+                    "the type-tuple provided to the ufunc "
+                    "must specify at least one none-None dtype");
+            return -1;
         }
 
         n_specified = n;
@@ -1990,7 +2037,7 @@ type_tuple_type_resolution(PyUFuncObject *self,
 
     /* If the ufunc has userloops, search for them. */
     if (self->userloops) {
-        switch (type_tuple_userloop_type_resolution(self,
+        switch (type_tuple_userloop_type_resolver(self,
                         n_specified, specified_types,
                         op, casting,
                         any_object, use_min_scalar,
@@ -2015,7 +2062,8 @@ type_tuple_type_resolution(PyUFuncObject *self,
 
         if (n_specified == nop) {
             for (j = 0; j < nop; ++j) {
-                if (types[j] != specified_types[j]) {
+                if (types[j] != specified_types[j] &&
+                                specified_types[j] != NPY_NOTYPE) {
                     matched = 0;
                     break;
                 }
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index dad2b6c6c..8effa33a4 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -2,63 +2,63 @@
 #define _NPY_PRIVATE__UFUNC_TYPE_RESOLUTION_H_
 
 NPY_NO_EXPORT int
-PyUFunc_SimpleBinaryComparisonTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_SimpleUnaryOperationTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SimpleUnaryOperationTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_OnesLikeTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_OnesLikeTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_SimpleBinaryOperationTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SimpleBinaryOperationTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_AbsoluteTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_AdditionTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_SubtractionTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_SubtractionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_MultiplicationTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_MultiplicationTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
                                 PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
-PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
+PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -71,7 +71,7 @@ PyUFunc_DivisionTypeResolution(PyUFuncObject *ufunc,
  * references in out_dtype.  This function does not do its own clean-up.
  */
 NPY_NO_EXPORT int
-linear_search_type_resolution(PyUFuncObject *self,
+linear_search_type_resolver(PyUFuncObject *self,
                         PyArrayObject **op,
                         NPY_CASTING input_casting,
                         NPY_CASTING output_casting,
@@ -85,7 +85,7 @@ linear_search_type_resolution(PyUFuncObject *self,
  * references in out_dtype.  This function does not do its own clean-up.
  */
 NPY_NO_EXPORT int
-type_tuple_type_resolution(PyUFuncObject *self,
+type_tuple_type_resolver(PyUFuncObject *self,
                         PyObject *type_tup,
                         PyArrayObject **op,
                         NPY_CASTING casting,
@@ -96,7 +96,8 @@ NPY_NO_EXPORT int
 PyUFunc_DefaultLegacyInnerLoopSelector(PyUFuncObject *ufunc,
                                 PyArray_Descr **dtypes,
                                 PyUFuncGenericFunction *out_innerloop,
-                                void **out_innerloopdata);
+                                void **out_innerloopdata,
+                                int *out_needs_api);
 
 NPY_NO_EXPORT int
 PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
@@ -104,7 +105,8 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc,
                             npy_intp *NPY_UNUSED(fixed_strides),
                             npy_intp NPY_UNUSED(fixed_mask_stride),
                             PyUFunc_MaskedStridedInnerLoopFunc **out_innerloop,
-                            NpyAuxData **out_innerloopdata);
+                            NpyAuxData **out_innerloopdata,
+                            int *out_needs_api);
 
 
 #endif
diff --git a/numpy/core/src/umath/umathmodule.c.src b/numpy/core/src/umath/umathmodule.c.src
index 02098f458..9843b0eba 100644
--- a/numpy/core/src/umath/umathmodule.c.src
+++ b/numpy/core/src/umath/umathmodule.c.src
@@ -46,7 +46,7 @@
 static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om};
 
 static int
-object_ufunc_type_resolution(PyUFuncObject *ufunc,
+object_ufunc_type_resolver(PyUFuncObject *ufunc,
                                 NPY_CASTING casting,
                                 PyArrayObject **operands,
                                 PyObject *type_tup,
@@ -72,10 +72,12 @@ static int
 object_ufunc_loop_selector(PyUFuncObject *ufunc,
                             PyArray_Descr **NPY_UNUSED(dtypes),
                             PyUFuncGenericFunction *out_innerloop,
-                            void **out_innerloopdata)
+                            void **out_innerloopdata,
+                            int *out_needs_api)
 {
     *out_innerloop = ufunc->functions[0];
     *out_innerloopdata = ufunc->data[0];
+    *out_needs_api = 1;
 
     return 0;
 }
@@ -122,7 +124,7 @@ ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUS
     self->core_offsets = NULL;
     self->core_signature = NULL;
 
-    self->type_resolution_function = &object_ufunc_type_resolution;
+    self->type_resolver = &object_ufunc_type_resolver;
     self->legacy_inner_loop_selector = &object_ufunc_loop_selector;
 
     pyname = PyObject_GetAttrString(function, "__name__");