ENH: Add keepdims argument for generalized ufuncs.

The argument can only be used if all inputs share the same number of core dimension, and no output has any core dimensions.
author: Marten van Kerkwijk <mhvk@astro.utoronto.ca> 2018-04-30 10:58:13 -0400
committer: Marten van Kerkwijk <mhvk@astro.utoronto.ca> 2018-05-18 09:46:44 -0400
commit: c29e267e6dadcaead6af9a4b1ddc97072e8e48bc (patch)
tree: ce3ee6f91924d78c906320defc25d912a5a159d7
parent: d520546e0f8364da7074ccbbb2cb52454af97b4d (diff)
download: numpy-c29e267e6dadcaead6af9a4b1ddc97072e8e48bc.tar.gz
2 files changed, 186 insertions, 39 deletions
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 9c9837a3e..af415362b 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -568,7 +568,8 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
                     PyObject **out_typetup,
                     int *out_subok,
                     PyArrayObject **out_wheremask,
-                    PyObject **out_axes)
+                    PyObject **out_axes,
+                    int *out_keepdims)
 {
     int i, nargs;
     int nin = ufunc->nin;
@@ -823,9 +824,10 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
 
             switch (str[0]) {
                 case 'a':
-                    /* possible axis argument for generalized ufunc */
+                    /* possible axes argument for generalized ufunc */
                     if (out_axes != NULL && strcmp(str, "axes") == 0) {
                         *out_axes = value;
+
                         bad_arg = 0;
                     }
                     break;
@@ -867,6 +869,17 @@ get_ufunc_arguments(PyUFuncObject *ufunc,
                         bad_arg = 0;
                     }
                     break;
+                case 'k':
+                    if (out_keepdims != NULL && strcmp(str, "keepdims") == 0) {
+                        if (!PyBool_Check(value)) {
+                            PyErr_SetString(PyExc_TypeError,
+                                        "'keepdims' must be a boolean");
+                            goto fail;
+                        }
+                        *out_keepdims = (value == Py_True);
+                        bad_arg = 0;
+                    }
+                    break;
                 case 'o':
                     /*
                      * Output arrays may be specified as a keyword argument,
@@ -1868,6 +1881,35 @@ _has_output_coredims(PyUFuncObject *ufunc) {
 }
 
 /*
+ * Check whether the gufunc can be used with keepdims, i.e., that all its
+ * input arguments have the same number of core dimension, and all output
+ * arguments have no core dimensions. Returns 0 if all is fine, and sets
+ * an error and returns -1 if not.
+ */
+static int
+_check_keepdims_support(PyUFuncObject *ufunc) {
+    int i;
+    int nin = ufunc->nin, nout = ufunc->nout;
+    int input_core_dims = ufunc->core_num_dims[0];
+    for (i = 1; i < nin + nout; i++) {
+        if (ufunc->core_num_dims[i] != (i < nin ? input_core_dims : 0)) {
+            PyErr_Format(PyExc_TypeError,
+                "%s does not support keepdims: its signature %s requires "
+                "that %s %d has %d core dimensions, but keepdims can only "
+                "be used when all inputs have the same number of core "
+                "dimensions and all outputs have no core dimensions.",
+                ufunc_get_name_cstr(ufunc),
+                ufunc->core_signature,
+                i < nin ? "input" : "output",
+                i < nin ? i : i - nin,
+                ufunc->core_num_dims[i]);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+/*
  * Interpret a possible axes keyword argument, using it to fill the remap_axis
  * array which maps default to actual axes for each operand, indexed as
  * as remap_axis[iop][iaxis]. The default axis order has first all broadcast
@@ -1876,8 +1918,8 @@ _has_output_coredims(PyUFuncObject *ufunc) {
  * Returns 0 on success, and -1 on failure
  */
 static int
-_parse_axes_arg(PyUFuncObject *ufunc, PyObject *axes, PyArrayObject **op,
-                int broadcast_ndim, int **remap_axis) {
+_parse_axes_arg(PyUFuncObject *ufunc, int core_num_dims[], PyObject *axes,
+                PyArrayObject **op, int broadcast_ndim, int **remap_axis) {
     int nin = ufunc->nin;
     int nout = ufunc->nout;
     int nop = nin + nout;
@@ -1907,7 +1949,7 @@ _parse_axes_arg(PyUFuncObject *ufunc, PyObject *axes, PyArrayObject **op,
         PyObject *op_axes_tuple, *axis_item;
         int axis, op_axis;
 
-        op_ncore = ufunc->core_num_dims[iop];
+        op_ncore = core_num_dims[iop];
         if (op[iop] != NULL) {
             op_ndim = PyArray_NDIM(op[iop]);
             op_nbroadcast = op_ndim - op_ncore;
@@ -2157,6 +2199,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 
     /* Use remapped axes for generalized ufunc */
     int broadcast_ndim, iter_ndim;
+    int core_num_dims_array[NPY_MAXARGS];
+    int *core_num_dims;
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
 
@@ -2193,6 +2237,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     NPY_CASTING casting = NPY_DEFAULT_ASSIGN_CASTING;
     /* When provided, extobj, typetup, and axes contain borrowed references */
     PyObject *extobj = NULL, *type_tup = NULL, *axes = NULL;
+    int keepdims = -1;
 
     if (ufunc == NULL) {
         PyErr_SetString(PyExc_ValueError, "function not supported");
@@ -2219,25 +2264,53 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     /* Get all the arguments */
     retval = get_ufunc_arguments(ufunc, args, kwds,
                 op, &order, &casting, &extobj,
-                &type_tup, &subok, NULL, &axes);
+                &type_tup, &subok, NULL, &axes, &keepdims);
     if (retval < 0) {
         goto fail;
     }
-
+    /*
+     * If keepdims was passed in (and thus changed from the initial value
+     * on top), check the gufunc is suitable, i.e., that its inputs share
+     * the same number of core dimensions, and its outputs have none.
+     */
+    if (keepdims != -1) {
+        retval = _check_keepdims_support(ufunc);
+        if (retval < 0) {
+            goto fail;
+        }
+    }
+    /*
+     * If keepdims is set and true, signal all dimensions will be the same.
+     */
+    if (keepdims == 1) {
+        int num_dims = ufunc->core_num_dims[0];
+        for (i = 0; i < nop; ++i) {
+            core_num_dims_array[i] = num_dims;
+        }
+        core_num_dims = core_num_dims_array;
+    }
+    else {
+        /* keepdims was not set or was false; no adjustment necessary */
+        core_num_dims = ufunc->core_num_dims;
+        keepdims = 0;
+    }
     /*
      * Check that operands have the minimum dimensions required.
      * (Just checks core; broadcast dimensions are tested by the iterator.)
      */
     for (i = 0; i < nop; i++) {
-        if (op[i] != NULL && PyArray_NDIM(op[i]) < ufunc->core_num_dims[i]) {
+        if (op[i] != NULL && PyArray_NDIM(op[i]) < core_num_dims[i]) {
             PyErr_Format(PyExc_ValueError,
                          "%s: %s operand %d does not have enough "
                          "dimensions (has %d, gufunc core with "
                          "signature %s requires %d)",
-                         ufunc_get_name_cstr(ufunc),
+                         ufunc_name,
                          i < nin ? "Input" : "Output",
-                         i < nin ? i : i - nin, PyArray_NDIM(op[i]),
-                         ufunc->core_signature, ufunc->core_num_dims[i]);
+                         i < nin ? i : i - nin,
+                         PyArray_NDIM(op[i]),
+                         ufunc->core_signature,
+                         core_num_dims[i]);
+            retval = -1;
             goto fail;
         }
     }
@@ -2249,7 +2322,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     broadcast_ndim = 0;
     for (i = 0; i < nin; ++i) {
-        int n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
+        int n = PyArray_NDIM(op[i]) - core_num_dims[i];
         if (n > broadcast_ndim) {
             broadcast_ndim = n;
         }
@@ -2263,7 +2336,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     iter_ndim = broadcast_ndim;
     for (i = nin; i < nop; ++i) {
-        iter_ndim += ufunc->core_num_dims[i];
+        iter_ndim += core_num_dims[i];
     }
     if (iter_ndim > NPY_MAXDIMS) {
         PyErr_Format(PyExc_ValueError,
@@ -2285,7 +2358,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         for (i=0; i < nop; i++) {
             remap_axis[i] = remap_axis_memory + i * NPY_MAXDIMS;
         }
-        retval = _parse_axes_arg(ufunc, axes, op, broadcast_ndim,
+        retval = _parse_axes_arg(ufunc, core_num_dims, axes, op, broadcast_ndim,
                                  remap_axis);
         if(retval < 0) {
             goto fail;
@@ -2307,12 +2380,13 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     j = broadcast_ndim;
     for (i = 0; i < nop; ++i) {
         int n;
+
         if (op[i]) {
             /*
              * Note that n may be negative if broadcasting
              * extends into the core dimensions.
              */
-            n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
+            n = PyArray_NDIM(op[i]) - core_num_dims[i];
         }
         else {
             n = broadcast_ndim;
@@ -2336,10 +2410,15 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         /* Except for when it belongs to this output */
         if (i >= nin) {
             int dim_offset = ufunc->core_offsets[i];
-            int num_dims = ufunc->core_num_dims[i];
-            /* Fill in 'iter_shape' and 'op_axes' for this output */
+            int num_dims = core_num_dims[i];
+            /*
+             * Fill in 'iter_shape' and 'op_axes' for the core dimensions
+             * of this output. Here, we have to be careful: if keepdims
+             * was used, then this axis is not a real core dimension,
+             * but is being added back for broadcasting, so its size is 1.
+             */
             for (idim = 0; idim < num_dims; ++idim) {
-                iter_shape[j] = core_dim_sizes[
+                iter_shape[j] = keepdims ? 1 : core_dim_sizes[
                                         ufunc->core_dim_ixs[dim_offset + idim]];
                 op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim);
                 ++j;
@@ -2459,7 +2538,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     core_dim_ixs_size = 0;
     for (i = 0; i < nop; ++i) {
-        core_dim_ixs_size += ufunc->core_num_dims[i];
+        core_dim_ixs_size += core_num_dims[i];
     }
     inner_strides = (npy_intp *)PyArray_malloc(
                         NPY_SIZEOF_INTP * (nop+core_dim_ixs_size));
@@ -2471,7 +2550,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     /* Copy the strides after the first nop */
     idim = nop;
     for (i = 0; i < nop; ++i) {
-        int num_dims = ufunc->core_num_dims[i];
+        int num_dims = core_num_dims[i];
         int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
         /*
          * Need to use the arrays in the iterator, not op, because
@@ -2721,7 +2800,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
     /* Get all the arguments */
     retval = get_ufunc_arguments(ufunc, args, kwds,
                 op, &order, &casting, &extobj,
-                &type_tup, &subok, &wheremask, NULL);
+                &type_tup, &subok, &wheremask, NULL, NULL);
     if (retval < 0) {
         goto fail;
     }
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 8479260a3..a5b9ce76f 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -5,6 +5,7 @@ import itertools
 
 import numpy as np
 import numpy.core._umath_tests as umt
+import numpy.linalg._umath_linalg as uml
 import numpy.core._operand_flag_tests as opflag_tests
 import numpy.core._rational_tests as _rational_tests
 from numpy.testing import (
@@ -611,49 +612,49 @@ class TestUfunc(object):
 
     def test_axes_argument(self):
         # inner1d signature: '(i),(i)->()'
-        in1d = umt.inner1d
+        inner1d = umt.inner1d
         a = np.arange(27.).reshape((3, 3, 3))
         b = np.arange(10., 19.).reshape((3, 1, 3))
         # basic tests on inputs (outputs tested below with matrix_multiply).
-        c = in1d(a, b)
+        c = inner1d(a, b)
         assert_array_equal(c, (a * b).sum(-1))
         # default
-        c = in1d(a, b, axes=[(-1,), (-1,), ()])
+        c = inner1d(a, b, axes=[(-1,), (-1,), ()])
         assert_array_equal(c, (a * b).sum(-1))
         # integers ok for single axis.
-        c = in1d(a, b, axes=[-1, -1, ()])
+        c = inner1d(a, b, axes=[-1, -1, ()])
         assert_array_equal(c, (a * b).sum(-1))
         # mix fine
-        c = in1d(a, b, axes=[(-1,), -1, ()])
+        c = inner1d(a, b, axes=[(-1,), -1, ()])
         assert_array_equal(c, (a * b).sum(-1))
         # can omit last axis.
-        c = in1d(a, b, axes=[-1, -1])
+        c = inner1d(a, b, axes=[-1, -1])
         assert_array_equal(c, (a * b).sum(-1))
         # can pass in other types of integer (with __index__ protocol)
-        c = in1d(a, b, axes=[np.int8(-1), np.array(-1, dtype=np.int32)])
+        c = inner1d(a, b, axes=[np.int8(-1), np.array(-1, dtype=np.int32)])
         assert_array_equal(c, (a * b).sum(-1))
         # swap some axes
-        c = in1d(a, b, axes=[0, 0])
+        c = inner1d(a, b, axes=[0, 0])
         assert_array_equal(c, (a * b).sum(0))
-        c = in1d(a, b, axes=[0, 2])
+        c = inner1d(a, b, axes=[0, 2])
         assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1))
         # Check errors for improperly constructed axes arguments.
         # should have list.
-        assert_raises(TypeError, in1d, a, b, axes=-1)
+        assert_raises(TypeError, inner1d, a, b, axes=-1)
         # needs enough elements
-        assert_raises(ValueError, in1d, a, b, axes=[-1])
+        assert_raises(ValueError, inner1d, a, b, axes=[-1])
         # should pass in indices.
-        assert_raises(TypeError, in1d, a, b, axes=[-1.0, -1.0])
-        assert_raises(TypeError, in1d, a, b, axes=[(-1.0,), -1])
-        assert_raises(TypeError, in1d, a, b, axes=[None, 1])
+        assert_raises(TypeError, inner1d, a, b, axes=[-1.0, -1.0])
+        assert_raises(TypeError, inner1d, a, b, axes=[(-1.0,), -1])
+        assert_raises(TypeError, inner1d, a, b, axes=[None, 1])
         # cannot pass an index unless there is only one dimension
         # (output is wrong in this case)
-        assert_raises(TypeError, in1d, a, b, axes=[-1, -1, -1])
+        assert_raises(TypeError, inner1d, a, b, axes=[-1, -1, -1])
         # or pass in generally the wrong number of axes
-        assert_raises(ValueError, in1d, a, b, axes=[-1, -1, (-1,)])
-        assert_raises(ValueError, in1d, a, b, axes=[-1, (-2, -1), ()])
+        assert_raises(ValueError, inner1d, a, b, axes=[-1, -1, (-1,)])
+        assert_raises(ValueError, inner1d, a, b, axes=[-1, (-2, -1), ()])
         # axes need to have same length.
-        assert_raises(ValueError, in1d, a, b, axes=[0, 1])
+        assert_raises(ValueError, inner1d, a, b, axes=[0, 1])
 
         # matrix_multiply signature: '(m,n),(n,p)->(m,p)'
         mm = umt.matrix_multiply
@@ -708,6 +709,73 @@ class TestUfunc(object):
         assert_raises(ValueError, mm, z[1], z, axes=[0, 1])
         assert_raises(ValueError, mm, z, z, out=z[0], axes=[0, 1])
 
+    def test_keepdims_argument(self):
+        # inner1d signature: '(i),(i)->()'
+        inner1d = umt.inner1d
+        a = np.arange(27.).reshape((3, 3, 3))
+        b = np.arange(10., 19.).reshape((3, 1, 3))
+        c = inner1d(a, b)
+        assert_array_equal(c, (a * b).sum(-1))
+        c = inner1d(a, b, keepdims=False)
+        assert_array_equal(c, (a * b).sum(-1))
+        c = inner1d(a, b, keepdims=True)
+        assert_array_equal(c, (a * b).sum(-1, keepdims=True))
+        out = np.zeros_like(c)
+        d = inner1d(a, b, keepdims=True, out=out)
+        assert_(d is out)
+        assert_array_equal(d, c)
+        # Now combined with axes.
+        c = inner1d(a, b, axes=[(-1,), (-1,), ()], keepdims=False)
+        assert_array_equal(c, (a * b).sum(-1))
+        c = inner1d(a, b, axes=[(-1,), (-1,), (-1,)], keepdims=True)
+        assert_array_equal(c, (a * b).sum(-1, keepdims=True))
+        c = inner1d(a, b, axes=[0, 0], keepdims=False)
+        assert_array_equal(c, (a * b).sum(0))
+        c = inner1d(a, b, axes=[0, 0, 0], keepdims=True)
+        assert_array_equal(c, (a * b).sum(0, keepdims=True))
+        c = inner1d(a, b, axes=[0, 2], keepdims=False)
+        assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1))
+        c = inner1d(a, b, axes=[0, 2], keepdims=True)
+        assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1,
+                                                             keepdims=True))
+        c = inner1d(a, b, axes=[0, 2, 2], keepdims=True)
+        assert_array_equal(c, (a.transpose(1, 2, 0) * b).sum(-1,
+                                                             keepdims=True))
+        c = inner1d(a, b, axes=[0, 2, 0], keepdims=True)
+        assert_array_equal(c, (a * b.transpose(2, 0, 1)).sum(0, keepdims=True))
+        # Hardly useful, but should work.
+        c = inner1d(a, b, axes=[0, 2, 1], keepdims=True)
+        assert_array_equal(c, (a.transpose(1, 0, 2) * b.transpose(0, 2, 1))
+                           .sum(1, keepdims=True))
+        # Check with two core dimensions.
+        a = np.eye(3) * np.arange(4.)[:, np.newaxis, np.newaxis]
+        expected = uml.det(a)
+        c = uml.det(a, keepdims=False)
+        assert_array_equal(c, expected)
+        c = uml.det(a, keepdims=True)
+        assert_array_equal(c, expected[:, np.newaxis, np.newaxis])
+        a = np.eye(3) * np.arange(4.)[:, np.newaxis, np.newaxis]
+        expected_s, expected_l = uml.slogdet(a)
+        cs, cl = uml.slogdet(a, keepdims=False)
+        assert_array_equal(cs, expected_s)
+        assert_array_equal(cl, expected_l)
+        cs, cl = uml.slogdet(a, keepdims=True)
+        assert_array_equal(cs, expected_s[:, np.newaxis, np.newaxis])
+        assert_array_equal(cl, expected_l[:, np.newaxis, np.newaxis])
+        # Sanity check on innerwt.
+        a = np.arange(6).reshape((2, 3))
+        b = np.arange(10, 16).reshape((2, 3))
+        w = np.arange(20, 26).reshape((2, 3))
+        assert_array_equal(umt.innerwt(a, b, w, keepdims=True),
+                           np.sum(a * b * w, axis=-1, keepdims=True))
+        # Check errors.
+        # Not a boolean
+        assert_raises(TypeError, inner1d, a, b, keepdims='true')
+        # 1 core dimension only.
+        mm = umt.matrix_multiply
+        assert_raises(TypeError, mm, a, b, keepdims=True)
+        assert_raises(TypeError, mm, a, b, keepdims=False)
+
     def test_innerwt(self):
         a = np.arange(6).reshape((2, 3))
         b = np.arange(10, 16).reshape((2, 3))
author	Marten van Kerkwijk <mhvk@astro.utoronto.ca>	2018-04-30 10:58:13 -0400
committer	Marten van Kerkwijk <mhvk@astro.utoronto.ca>	2018-05-18 09:46:44 -0400
commit	c29e267e6dadcaead6af9a4b1ddc97072e8e48bc (patch)
tree	ce3ee6f91924d78c906320defc25d912a5a159d7
parent	d520546e0f8364da7074ccbbb2cb52454af97b4d (diff)
download	numpy-c29e267e6dadcaead6af9a4b1ddc97072e8e48bc.tar.gz