3 files changed, 160 insertions, 387 deletions
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 79adb0051..bcfbdd954 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -25,235 +25,6 @@
 #include "reduction.h"
 #include "extobj.h"  /* for _check_ufunc_fperr */
 
-/*
- * Allocates a result array for a reduction operation, with
- * dimensions matching 'arr' except set to 1 with 0 stride
- * wherever axis_flags is True. Dropping the reduction axes
- * from the result must be done later by the caller once the
- * computation is complete.
- *
- * This function always allocates a base class ndarray.
- *
- * If 'dtype' isn't NULL, this function steals its reference.
- */
-static PyArrayObject *
-allocate_reduce_result(PyArrayObject *arr, const npy_bool *axis_flags,
-                        PyArray_Descr *dtype, int subok)
-{
-    npy_intp strides[NPY_MAXDIMS], stride;
-    npy_intp shape[NPY_MAXDIMS], *arr_shape = PyArray_DIMS(arr);
-    npy_stride_sort_item strideperm[NPY_MAXDIMS];
-    int idim, ndim = PyArray_NDIM(arr);
-
-    if (dtype == NULL) {
-        dtype = PyArray_DTYPE(arr);
-        Py_INCREF(dtype);
-    }
-
-    PyArray_CreateSortedStridePerm(PyArray_NDIM(arr),
-                                    PyArray_STRIDES(arr), strideperm);
-
-    /* Build the new strides and shape */
-    stride = dtype->elsize;
-    if (ndim) {
-        memcpy(shape, arr_shape, ndim * sizeof(shape[0]));
-    }
-    for (idim = ndim-1; idim >= 0; --idim) {
-        npy_intp i_perm = strideperm[idim].perm;
-        if (axis_flags[i_perm]) {
-            strides[i_perm] = 0;
-            shape[i_perm] = 1;
-        }
-        else {
-            strides[i_perm] = stride;
-            stride *= shape[i_perm];
-        }
-    }
-
-    /* Finally, allocate the array */
-    return (PyArrayObject *)PyArray_NewFromDescr(
-                                    subok ? Py_TYPE(arr) : &PyArray_Type,
-                                    dtype, ndim, shape, strides,
-                                    NULL, 0, subok ? (PyObject *)arr : NULL);
-}
-
-/*
- * Conforms an output parameter 'out' to have 'ndim' dimensions
- * with dimensions of size one added in the appropriate places
- * indicated by 'axis_flags'.
- *
- * The return value is a view into 'out'.
- */
-static PyArrayObject *
-conform_reduce_result(PyArrayObject *in, const npy_bool *axis_flags,
-                      PyArrayObject *out, int keepdims, const char *funcname,
-                      int need_copy)
-{
-    /* unpack shape information */
-    int const ndim = PyArray_NDIM(in);
-    npy_intp const *shape_in = PyArray_DIMS(in);
-
-    int const ndim_out = PyArray_NDIM(out);
-    npy_intp const *strides_out = PyArray_STRIDES(out);
-    npy_intp const *shape_out = PyArray_DIMS(out);
-
-    /*
-     * If the 'keepdims' parameter is true, do a simpler validation and
-     * return a new reference to 'out'.
-     */
-    if (keepdims) {
-        if (PyArray_NDIM(out) != ndim) {
-            PyErr_Format(PyExc_ValueError,
-                    "output parameter for reduction operation %s "
-                    "has the wrong number of dimensions (must match "
-                    "the operand's when keepdims=True)", funcname);
-            return NULL;
-        }
-
-        for (int idim = 0; idim < ndim; ++idim) {
-            if (axis_flags[idim]) {
-                if (shape_out[idim] != 1) {
-                    PyErr_Format(PyExc_ValueError,
-                            "output parameter for reduction operation %s "
-                            "has a reduction dimension not equal to one "
-                            "(required when keepdims=True)", funcname);
-                    return NULL;
-                }
-            }
-            else {
-                if (shape_out[idim] != shape_in[idim]) {
-                    PyErr_Format(PyExc_ValueError,
-                            "output parameter for reduction operation %s "
-                            "has a non-reduction dimension not equal to "
-                            "the input one.", funcname);
-                    return NULL;
-                }
-            }
-
-        }
-
-        Py_INCREF(out);
-        return out;
-    }
-
-    /* Construct the strides and shape */
-    npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS];
-    int idim_out = 0;
-    for (int idim = 0; idim < ndim; ++idim) {
-        if (axis_flags[idim]) {
-            strides[idim] = 0;
-            shape[idim] = 1;
-        }
-        else {
-            if (idim_out >= ndim_out) {
-                PyErr_Format(PyExc_ValueError,
-                        "output parameter for reduction operation %s "
-                        "does not have enough dimensions", funcname);
-                return NULL;
-            }
-            if (shape_out[idim_out] != shape_in[idim]) {
-                PyErr_Format(PyExc_ValueError,
-                        "output parameter for reduction operation %s "
-                        "has a non-reduction dimension not equal to "
-                        "the input one.", funcname);
-                return NULL;
-            }
-            strides[idim] = strides_out[idim_out];
-            shape[idim] = shape_out[idim_out];
-            ++idim_out;
-        }
-    }
-
-    if (idim_out != ndim_out) {
-        PyErr_Format(PyExc_ValueError,
-                "output parameter for reduction operation %s "
-                "has too many dimensions", funcname);
-        return NULL;
-    }
-
-    /* Allocate the view */
-    PyArray_Descr *dtype = PyArray_DESCR(out);
-    Py_INCREF(dtype);
-
-    PyArrayObject_fields *ret = (PyArrayObject_fields *)PyArray_NewFromDescrAndBase(
-            &PyArray_Type, dtype,
-            ndim, shape, strides, PyArray_DATA(out),
-            PyArray_FLAGS(out), NULL, (PyObject *)out);
-    if (ret == NULL) {
-        return NULL;
-    }
-
-    if (need_copy) {
-        PyArrayObject *ret_copy = (PyArrayObject *)PyArray_NewLikeArray(
-                (PyArrayObject *)ret, NPY_ANYORDER, NULL, 0);
-        if (ret_copy == NULL) {
-            Py_DECREF(ret);
-            return NULL;
-        }
-
-        if (PyArray_CopyInto(ret_copy, (PyArrayObject *)ret) != 0) {
-            Py_DECREF(ret);
-            Py_DECREF(ret_copy);
-            return NULL;
-        }
-
-        if (PyArray_SetWritebackIfCopyBase(ret_copy, (PyArrayObject *)ret) < 0) {
-            Py_DECREF(ret_copy);
-            return NULL;
-        }
-
-        return ret_copy;
-    }
-    else {
-        return (PyArrayObject *)ret;
-    }
-}
-
-/*
- * Creates a result for reducing 'operand' along the axes specified
- * in 'axis_flags'. If 'dtype' isn't NULL, this function steals a
- * reference to 'dtype'.
- *
- * If 'out' isn't NULL, this function creates a view conforming
- * to the number of dimensions of 'operand', adding a singleton dimension
- * for each reduction axis specified. In this case, 'dtype' is ignored
- * (but its reference is still stolen), and the caller must handle any
- * type conversion/validity check for 'out'
- *
- * If 'subok' is true, creates a result with the subtype of 'operand',
- * otherwise creates on with the base ndarray class.
- *
- * If 'out' is NULL, it allocates a new array whose shape matches that of
- * 'operand', except for at the reduction axes. If 'dtype' is NULL, the dtype
- * of 'operand' is used for the result.
- */
-NPY_NO_EXPORT PyArrayObject *
-PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
-                           PyArray_Descr *dtype, npy_bool *axis_flags,
-                           int keepdims, int subok,
-                           const char *funcname)
-{
-    PyArrayObject *result;
-
-    if (out == NULL) {
-        /* This function steals the reference to 'dtype' */
-        result = allocate_reduce_result(operand, axis_flags, dtype, subok);
-    }
-    else {
-        int need_copy = 0;
-
-        if (solve_may_share_memory(operand, out, 1) != 0) {
-            need_copy = 1;
-        }
-
-        /* Steal the dtype reference */
-        Py_XDECREF(dtype);
-        result = conform_reduce_result(operand, axis_flags,
-                                       out, keepdims, funcname, need_copy);
-    }
-
-    return result;
-}
 
 /*
  * Count the number of dimensions selected in 'axis_flags'
@@ -275,18 +46,15 @@ count_axes(int ndim, const npy_bool *axis_flags)
 /*
  * This function initializes a result array for a reduction operation
  * which has no identity. This means it needs to copy the first element
- * it sees along the reduction axes to result, then return a view of
- * the operand which excludes that element.
+ * it sees along the reduction axes to result.
  *
  * If a reduction has an identity, such as 0 or 1, the result should be
- * initialized by calling PyArray_AssignZero(result, NULL, NULL) or
- * PyArray_AssignOne(result, NULL, NULL), because this function raises an
- * exception when there are no elements to reduce (which appropriate iff the
- * reduction operation has no identity).
+ * fully initialized to the identity, because this function raises an
+ * exception when there are no elements to reduce (which is appropriate if,
+ * and only if, the reduction operation has no identity).
  *
  * This means it copies the subarray indexed at zero along each reduction axis
- * into 'result', then returns a view into 'operand' excluding those copied
- * elements.
+ * into 'result'.
  *
  * result  : The array into which the result is computed. This must have
  *           the same number of dimensions as 'operand', but for each
@@ -294,105 +62,83 @@ count_axes(int ndim, const npy_bool *axis_flags)
  * operand : The array being reduced.
  * axis_flags : An array of boolean flags, one for each axis of 'operand'.
  *              When a flag is True, it indicates to reduce along that axis.
- * out_skip_first_count : This gets populated with the number of first-visit
- *                        elements that should be skipped during the
- *                        iteration loop.
  * funcname : The name of the reduction operation, for the purpose of
  *            better quality error messages. For example, "numpy.max"
  *            would be a good name for NumPy's max function.
  *
- * Returns a view which contains the remaining elements on which to do
- * the reduction.
+ * Returns -1 if an error occurred, and otherwise the reduce arrays size,
+ * which is the number of elements already initialized.
  */
-NPY_NO_EXPORT PyArrayObject *
-PyArray_InitializeReduceResult(
+NPY_NO_EXPORT int
+PyArray_CopyInitialReduceValues(
                     PyArrayObject *result, PyArrayObject *operand,
-                    const npy_bool *axis_flags,
-                    npy_intp *out_skip_first_count, const char *funcname)
+                    const npy_bool *axis_flags, const char *funcname,
+                    int keepdims)
 {
-    npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS];
+    npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+    npy_intp *shape_orig = PyArray_SHAPE(operand);
+    npy_intp *strides_orig = PyArray_STRIDES(operand);
     PyArrayObject *op_view = NULL;
-    int idim, ndim, nreduce_axes;
 
-    ndim = PyArray_NDIM(operand);
-
-    /* Default to no skipping first-visit elements in the iteration */
-    *out_skip_first_count = 0;
-
-    /* Take a view into 'operand' which we can modify. */
-    op_view = (PyArrayObject *)PyArray_View(operand, NULL, &PyArray_Type);
-    if (op_view == NULL) {
-        return NULL;
-    }
+    int ndim = PyArray_NDIM(operand);
 
     /*
-     * Now copy the subarray of the first element along each reduction axis,
-     * then return a view to the rest.
+     * Copy the subarray of the first element along each reduction axis.
      *
      * Adjust the shape to only look at the first element along
-     * any of the reduction axes. We count the number of reduction axes
-     * at the same time.
+     * any of the reduction axes. If keepdims is False remove the axes
+     * entirely.
      */
-    shape = PyArray_SHAPE(op_view);
-    nreduce_axes = 0;
-    if (ndim) {
-        memcpy(shape_orig, shape, ndim * sizeof(npy_intp));
-    }
-    for (idim = 0; idim < ndim; ++idim) {
+    int idim_out = 0;
+    npy_intp size = 1;
+    for (int idim = 0; idim < ndim; idim++) {
         if (axis_flags[idim]) {
-            if (shape[idim] == 0) {
+            if (NPY_UNLIKELY(shape_orig[idim] == 0)) {
                 PyErr_Format(PyExc_ValueError,
-                             "zero-size array to reduction operation %s "
-                             "which has no identity",
-                             funcname);
-                Py_DECREF(op_view);
-                return NULL;
+                        "zero-size array to reduction operation %s "
+                        "which has no identity", funcname);
+                return -1;
             }
-            shape[idim] = 1;
-            ++nreduce_axes;
+            if (keepdims) {
+                shape[idim_out] = 1;
+                strides[idim_out] = 0;
+                idim_out++;
+            }
+        }
+        else {
+            size *= shape_orig[idim];
+            shape[idim_out] = shape_orig[idim];
+            strides[idim_out] = strides_orig[idim];
+            idim_out++;
         }
     }
 
-    /*
-     * Copy the elements into the result to start.
-     */
-    if (PyArray_CopyInto(result, op_view) < 0) {
-        Py_DECREF(op_view);
-        return NULL;
+    PyArray_Descr *descr = PyArray_DESCR(operand);
+    Py_INCREF(descr);
+    op_view = (PyArrayObject *)PyArray_NewFromDescr(
+            &PyArray_Type, descr, idim_out, shape, strides,
+            PyArray_DATA(operand), 0, NULL);
+    if (op_view == NULL) {
+        return -1;
     }
 
     /*
-     * If there is one reduction axis, adjust the view's
-     * shape to only look at the remaining elements
+     * Copy the elements into the result to start.
      */
-    if (nreduce_axes == 1) {
-        strides = PyArray_STRIDES(op_view);
-        for (idim = 0; idim < ndim; ++idim) {
-            if (axis_flags[idim]) {
-                shape[idim] = shape_orig[idim] - 1;
-                ((PyArrayObject_fields *)op_view)->data += strides[idim];
-            }
-        }
-    }
-    /* If there are zero reduction axes, make the view empty */
-    else if (nreduce_axes == 0) {
-        for (idim = 0; idim < ndim; ++idim) {
-            shape[idim] = 0;
-        }
+    int res = PyArray_CopyInto(result, op_view);
+    Py_DECREF(op_view);
+    if (res < 0) {
+        return -1;
     }
+
     /*
-     * Otherwise iterate over the whole operand, but tell the inner loop
-     * to skip the elements we already copied by setting the skip_first_count.
+     * If there were no reduction axes, we would already be done here.
+     * Note that if there is only a single reduction axis, in principle the
+     * iteration could be set up more efficiently here by removing that
+     * axis before setting up the iterator (simplifying the iteration since
+     * `skip_first_count` (the returned size) can be set to 0).
      */
-    else {
-        *out_skip_first_count = PyArray_SIZE(result);
-
-        Py_DECREF(op_view);
-        Py_INCREF(operand);
-        op_view = operand;
-    }
-
-    return op_view;
+    return size;
 }
 
 /*
@@ -418,7 +164,7 @@ PyArray_InitializeReduceResult(
  *               with size one.
  * subok       : If true, the result uses the subclass of operand, otherwise
  *               it is always a base class ndarray.
- * identity    : If Py_None, PyArray_InitializeReduceResult is used, otherwise
+ * identity    : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise
  *               this value is used to initialize the result to
  *               the reduction's unit.
  * loop        : The loop which does the reduction.
@@ -444,13 +190,12 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
                       NPY_CASTING casting,
                       npy_bool *axis_flags, int reorderable,
                       int keepdims,
-                      int subok,
                       PyObject *identity,
                       PyArray_ReduceLoopFunc *loop,
                       void *data, npy_intp buffersize, const char *funcname,
                       int errormask)
 {
-    PyArrayObject *result = NULL, *op_view = NULL;
+    PyArrayObject *result = NULL;
     npy_intp skip_first_count = 0;
 
     /* Iterator parameters */
@@ -476,49 +221,10 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
         return NULL;
     }
 
-    /*
-     * This either conforms 'out' to the ndim of 'operand', or allocates
-     * a new array appropriate for this reduction.
-     *
-     * A new array with WRITEBACKIFCOPY is allocated if operand and out have memory
-     * overlap.
-     */
-    Py_INCREF(result_dtype);
-    result = PyArray_CreateReduceResult(operand, out,
-                            result_dtype, axis_flags,
-                            keepdims, subok, funcname);
-    if (result == NULL) {
-        goto fail;
-    }
-
-    /*
-     * Initialize the result to the reduction unit if possible,
-     * otherwise copy the initial values and get a view to the rest.
-     */
-    if (identity != Py_None) {
-        if (PyArray_FillWithScalar(result, identity) < 0) {
-            goto fail;
-        }
-        op_view = operand;
-        Py_INCREF(op_view);
-    }
-    else {
-        op_view = PyArray_InitializeReduceResult(
-            result, operand, axis_flags, &skip_first_count, funcname);
-        if (op_view == NULL) {
-            goto fail;
-        }
-        /* empty op_view signals no reduction; but 0-d arrays cannot be empty */
-        if ((PyArray_SIZE(op_view) == 0) || (PyArray_NDIM(operand) == 0)) {
-            Py_DECREF(op_view);
-            op_view = NULL;
-            goto finish;
-        }
-    }
 
     /* Set up the iterator */
-    op[0] = result;
-    op[1] = op_view;
+    op[0] = out;
+    op[1] = operand;
     op_dtypes[0] = result_dtype;
     op_dtypes[1] = operand_dtype;
 
@@ -527,13 +233,16 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
             NPY_ITER_GROWINNER |
             NPY_ITER_DONT_NEGATE_STRIDES |
             NPY_ITER_ZEROSIZE_OK |
-            NPY_ITER_REDUCE_OK |
-            NPY_ITER_REFS_OK;
+            NPY_ITER_REFS_OK |
+            NPY_ITER_DELAY_BUFALLOC |
+            NPY_ITER_COPY_IF_OVERLAP;
     op_flags[0] = NPY_ITER_READWRITE |
                   NPY_ITER_ALIGNED |
+                  NPY_ITER_ALLOCATE |
                   NPY_ITER_NO_SUBTYPE;
     op_flags[1] = NPY_ITER_READONLY |
                   NPY_ITER_ALIGNED;
+
     if (wheremask != NULL) {
         op[2] = wheremask;
         /* wheremask is guaranteed to be NPY_BOOL, so borrow its reference */
@@ -545,15 +254,84 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
         op_flags[2] = NPY_ITER_READONLY;
     }
 
+    /* Set up result array axes mapping, operand and wheremask use default */
+    int result_axes[NPY_MAXDIMS];
+    int *op_axes[3] = {result_axes, NULL, NULL};
+
+    int curr_axis = 0;
+    for (int i = 0; i < PyArray_NDIM(operand); i++) {
+        if (axis_flags[i]) {
+            if (keepdims) {
+                result_axes[i] = NPY_ITER_REDUCTION_AXIS(curr_axis);
+                curr_axis++;
+            }
+            else {
+                result_axes[i] = NPY_ITER_REDUCTION_AXIS(-1);
+            }
+        }
+        else {
+            result_axes[i] = curr_axis;
+            curr_axis++;
+        }
+    }
+    if (out != NULL) {
+        /* NpyIter does not raise a good error message in this common case. */
+        if (NPY_UNLIKELY(curr_axis != PyArray_NDIM(out))) {
+            if (keepdims) {
+                PyErr_Format(PyExc_ValueError,
+                        "output parameter for reduction operation %s has the "
+                        "wrong number of dimensions: Found %d but expected %d "
+                        "(must match the operand's when keepdims=True)",
+                        funcname, PyArray_NDIM(out), curr_axis);
+            }
+            else {
+                PyErr_Format(PyExc_ValueError,
+                        "output parameter for reduction operation %s has the "
+                        "wrong number of dimensions: Found %d but expected %d",
+                        funcname, PyArray_NDIM(out), curr_axis);
+            }
+            goto fail;
+        }
+    }
+
     iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, flags,
                                NPY_KEEPORDER, casting,
                                op_flags,
                                op_dtypes,
-                               -1, NULL, NULL, buffersize);
+                               PyArray_NDIM(operand), op_axes, NULL, buffersize);
     if (iter == NULL) {
         goto fail;
     }
 
+    result = NpyIter_GetOperandArray(iter)[0];
+
+    /*
+     * Initialize the result to the reduction unit if possible,
+     * otherwise copy the initial values and get a view to the rest.
+     */
+
+    if (identity != Py_None) {
+        if (PyArray_FillWithScalar(result, identity) < 0) {
+            goto fail;
+        }
+    }
+    else {
+        /*
+         * For 1-D skip_first_count could be optimized to 0, but no-identity
+         * reductions are not super common.
+         * (see also comment in CopyInitialReduceValues)
+         */
+        skip_first_count = PyArray_CopyInitialReduceValues(
+                result, operand, axis_flags, funcname, keepdims);
+        if (skip_first_count < 0) {
+            goto fail;
+        }
+    }
+
+    if (!NpyIter_Reset(iter, NULL)) {
+        goto fail;
+    }
+
     /* Start with the floating-point exception flags cleared */
     npy_clear_floatstatus_barrier((char*)&iter);
 
@@ -595,29 +373,15 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
         goto fail;
     }
 
-    NpyIter_Deallocate(iter);
-    Py_DECREF(op_view);
-
-finish:
-    /* Strip out the extra 'one' dimensions in the result */
-    if (out == NULL) {
-        if (!keepdims) {
-            PyArray_RemoveAxesInPlace(result, axis_flags);
-        }
-    }
-    else {
-        PyArray_ResolveWritebackIfCopy(result); /* prevent spurious warnings */
-        Py_DECREF(result);
+    if (out != NULL) {
         result = out;
-        Py_INCREF(result);
     }
+    Py_INCREF(result);
 
+    NpyIter_Deallocate(iter);
     return result;
 
 fail:
-    PyArray_ResolveWritebackIfCopy(result); /* prevent spurious warnings */
-    Py_XDECREF(result);
-    Py_XDECREF(op_view);
     if (iter != NULL) {
         NpyIter_Deallocate(iter);
     }
diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h
index 0c2183ed6..372605dba 100644
--- a/numpy/core/src/umath/reduction.h
+++ b/numpy/core/src/umath/reduction.h
@@ -128,9 +128,7 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
  *               of cache behavior or multithreading requirements.
  * keepdims    : If true, leaves the reduction dimensions in the result
  *               with size one.
- * subok       : If true, the result uses the subclass of operand, otherwise
- *               it is always a base class ndarray.
- * identity    : If Py_None, PyArray_InitializeReduceResult is used, otherwise
+ * identity    : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise
  *               this value is used to initialize the result to
  *               the reduction's unit.
  * loop        : The loop which does the reduction.
@@ -147,7 +145,6 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
                       NPY_CASTING casting,
                       npy_bool *axis_flags, int reorderable,
                       int keepdims,
-                      int subok,
                       PyObject *identity,
                       PyArray_ReduceLoopFunc *loop,
                       void *data, npy_intp buffersize, const char *funcname,
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index e47365293..1dbfa87e3 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1903,22 +1903,34 @@ class TestUfunc:
         assert_equal(y_base[1,:], y_base_copy[1,:])
         assert_equal(y_base[3,:], y_base_copy[3,:])
 
-    @pytest.mark.parametrize('output_shape',
-                             [(), (1,), (1, 1), (1, 3), (4, 3)])
+    @pytest.mark.parametrize('out_shape',
+                             [(), (1,), (3,), (1, 1), (1, 3), (4, 3)])
+    @pytest.mark.parametrize('keepdims', [True, False])
     @pytest.mark.parametrize('f_reduce', [np.add.reduce, np.minimum.reduce])
-    def test_reduce_wrong_dimension_output(self, f_reduce, output_shape):
+    def test_reduce_wrong_dimension_output(self, f_reduce, keepdims, out_shape):
         # Test that we're not incorrectly broadcasting dimensions.
         # See gh-15144 (failed for np.add.reduce previously).
         a = np.arange(12.).reshape(4, 3)
-        out = np.empty(output_shape, a.dtype)
-        assert_raises(ValueError, f_reduce, a, axis=0, out=out)
-        if output_shape != (1, 3):
-            assert_raises(ValueError, f_reduce, a, axis=0, out=out,
-                          keepdims=True)
+        out = np.empty(out_shape, a.dtype)
+
+        correct_out = f_reduce(a, axis=0, keepdims=keepdims)
+        if out_shape != correct_out.shape:
+            with assert_raises(ValueError):
+                f_reduce(a, axis=0, out=out, keepdims=keepdims)
         else:
-            check = f_reduce(a, axis=0, out=out, keepdims=True)
+            check = f_reduce(a, axis=0, out=out, keepdims=keepdims)
             assert_(check is out)
-            assert_array_equal(check, f_reduce(a, axis=0, keepdims=True))
+            assert_array_equal(check, correct_out)
+
+    def test_reduce_output_no_subclass(self):
+        class MyArr(np.ndarray):
+            pass
+
+        out = np.empty(())
+        np.add.reduce(np.ones(5), out=out)  # no subclass, all fine
+        out = out.view(MyArr)
+        assert np.add.reduce(np.ones(5), out=out) is out
+        assert type(np.add.reduce(out)) is MyArr
 
     def test_no_doc_string(self):
         # gh-9337