Merge pull request #5077 from jaimefrio/gufuncs_core_dim_no_broadcast

WIP: gufunc core dimensions should not broadcast
author: Charles Harris <charlesr.harris@gmail.com> 2014-10-27 12:07:48 -0700
committer: Charles Harris <charlesr.harris@gmail.com> 2014-10-27 12:07:48 -0700
commit: 16575443239fa84615fc795692a79ef27f25c216 (patch)
tree: ed9c9e0fb1f74d45eebc69317a4680ce5160cb11 /numpy/core
parent: 23ee379e86434518bc33ccd9e711a86188914de0 (diff)
parent: 528bac1380c782772b9de207bb8466b03117b96d (diff)
download: numpy-16575443239fa84615fc795692a79ef27f25c216.tar.gz
3 files changed, 192 insertions, 88 deletions
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index b12fbf57f..466cc2b22 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1901,60 +1901,67 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     }
 
     /*
-     * Validate the core dimensions of all the operands,
-     * and collect all of the labeled core dimension sizes
-     * into the array 'core_dim_sizes'. Initialize them to
-     * 1, for example in the case where the operand broadcasts
-     * to a core dimension, it won't be visited.
+     * Validate the core dimensions of all the operands, and collect all of
+     * the labelled core dimensions into 'core_dim_sizes'.
+     *
+     * The behavior has been changed in Numpy 1.10.0, and the following
+     * requirements must be fulfilled or an error will be raised:
+     *  * Arguments, both input and output, must have at least as many
+     *    dimensions as the corresponding number of core dimensions. In
+     *    previous versions, 1's were prepended to the shape as needed.
+     *  * Core dimensions with same labels must have exactly matching sizes.
+     *    In previous versions, core dimensions of size 1 would broadcast
+     *    against other core dimensions with the same label.
+     *  * All core dimensions must have their size specified by a passed in
+     *    input or output argument. In previous versions, core dimensions in
+     *    an output argument that were not specified in an input argument,
+     *    and whose size could not be inferred from a passed in output
+     *    argument, would have their size set to 1.
      */
     for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
-        core_dim_sizes[i] = 1;
+        core_dim_sizes[i] = -1;
     }
     for (i = 0; i < nop; ++i) {
         if (op[i] != NULL) {
             int dim_offset = ufunc->core_offsets[i];
             int num_dims = ufunc->core_num_dims[i];
             int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
-            /* Make sure any output operand has enough dimensions */
-            if (i >= nin && core_start_dim < 0) {
+
+            /* Check if operands have enough dimensions */
+            if (core_start_dim < 0) {
                 PyErr_Format(PyExc_ValueError,
-                        "%s: Output operand %d does not have enough dimensions "
-                        "(has %d, gufunc core with signature %s "
-                        "requires %d)",
-                        ufunc_name, i - nin, PyArray_NDIM(op[i]),
+                        "%s: %s operand %d does not have enough "
+                        "dimensions (has %d, gufunc core with "
+                        "signature %s requires %d)",
+                        ufunc_name, i < nin ? "Input" : "Output",
+                        i < nin ? i : i - nin, PyArray_NDIM(op[i]),
                         ufunc->core_signature, num_dims);
                 retval = -1;
                 goto fail;
             }
 
             /*
-             * Make sure each core dimension matches all other core
-             * dimensions with the same label
-             *
-             * NOTE: For input operands, core_start_dim may be negative.
-             *       In that case, the operand is being broadcast onto
-             *       core dimensions. For example, a scalar will broadcast
-             *       to fit any core signature.
+             * Make sure every core dimension exactly matches all other core
+             * dimensions with the same label.
              */
-            if (core_start_dim >= 0) {
-                idim = 0;
-            } else {
-                idim = -core_start_dim;
-            }
-            for (; idim < num_dims; ++idim) {
-                int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+            for (idim = 0; idim < num_dims; ++idim) {
+                int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim];
                 npy_intp op_dim_size =
-                        PyArray_SHAPE(op[i])[core_start_dim + idim];
-                if (core_dim_sizes[core_dim_index] == 1) {
+                            PyArray_DIM(op[i], core_start_dim+idim);
+
+                if (core_dim_sizes[core_dim_index] == -1) {
                     core_dim_sizes[core_dim_index] = op_dim_size;
-                } else if ((i >= nin || op_dim_size != 1) &&
-                            core_dim_sizes[core_dim_index] != op_dim_size) {
+                }
+                else if (op_dim_size != core_dim_sizes[core_dim_index]) {
                     PyErr_Format(PyExc_ValueError,
-                            "%s: Operand %d has a mismatch in its core "
-                            "dimension %d, with gufunc signature %s "
-                            "(size %zd is different from %zd)",
-                            ufunc_name, i, idim, ufunc->core_signature,
-                            op_dim_size, core_dim_sizes[core_dim_index]);
+                            "%s: %s operand %d has a mismatch in its "
+                            "core dimension %d, with gufunc "
+                            "signature %s (size %zd is different "
+                            "from %zd)",
+                            ufunc_name, i < nin ? "Input" : "Output",
+                            i < nin ? i : i - nin, idim,
+                            ufunc->core_signature, op_dim_size,
+                            core_dim_sizes[core_dim_index]);
                     retval = -1;
                     goto fail;
                 }
@@ -1962,6 +1969,44 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         }
     }
 
+    /*
+     * Make sure no core dimension is unspecified.
+     */
+    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
+        if (core_dim_sizes[i] == -1) {
+            break;
+        }
+    }
+    if (i != ufunc->core_num_dim_ix) {
+        /*
+         * There is at least one core dimension missing, find in which
+         * operand it comes up first (it has to be an output operand).
+         */
+        const int missing_core_dim = i;
+        int out_op;
+        for (out_op = nin; out_op < nop; ++out_op) {
+            int first_idx = ufunc->core_offsets[out_op];
+            int last_idx = first_idx + ufunc->core_num_dims[out_op];
+            for (i = first_idx; i < last_idx; ++i) {
+                if (ufunc->core_dim_ixs[i] == missing_core_dim) {
+                    break;
+                }
+            }
+            if (i < last_idx) {
+                /* Change index offsets for error message */
+                out_op -= nin;
+                i -= first_idx;
+                break;
+            }
+        }
+        PyErr_Format(PyExc_ValueError,
+                     "%s: Output operand %d has core dimension %d "
+                     "unspecified, with gufunc signature %s",
+                     ufunc_name, out_op, i, ufunc->core_signature);
+        retval = -1;
+        goto fail;
+    }
+
     /* Fill in the initial part of 'iter_shape' */
     for (idim = 0; idim < broadcast_ndim; ++idim) {
         iter_shape[idim] = -1;
diff --git a/numpy/core/src/umath/umath_tests.c.src b/numpy/core/src/umath/umath_tests.c.src
index 46d06288d..33d9846bd 100644
--- a/numpy/core/src/umath/umath_tests.c.src
+++ b/numpy/core/src/umath/umath_tests.c.src
@@ -38,6 +38,9 @@
     INIT_OUTER_LOOP_3           \
     npy_intp s3 = *steps++;
 
+#define BEGIN_OUTER_LOOP_2      \
+    for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) {
+
 #define BEGIN_OUTER_LOOP_3      \
     for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) {
 
@@ -58,13 +61,14 @@ char *inner1d_signature = "(i),(i)->()";
 /**begin repeat
 
    #TYPE=LONG,DOUBLE#
-   #typ=npy_long, npy_double#
+   #typ=npy_long,npy_double#
 */
 
 /*
  *  This implements the function
  *        out[n] = sum_i { in1[n, i] * in2[n, i] }.
  */
+
 static void
 @TYPE@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
@@ -91,7 +95,7 @@ char *innerwt_signature = "(i),(i),(i)->()";
 /**begin repeat
 
    #TYPE=LONG,DOUBLE#
-   #typ=npy_long, npy_double#
+   #typ=npy_long,npy_double#
 */
 
 
@@ -127,7 +131,7 @@ char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
 /**begin repeat
 
    #TYPE=FLOAT,DOUBLE,LONG#
-   #typ=npy_float, npy_double,npy_long#
+   #typ=npy_float,npy_double,npy_long#
 */
 
 /*
@@ -135,7 +139,6 @@ char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
  *        out[k, m, p] = sum_n { in1[k, m, n] * in2[k, n, p] }.
  */
 
-
 static void
 @TYPE@_matrix_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
 {
@@ -177,27 +180,66 @@ static void
 
 /**end repeat**/
 
-/*  The following lines were generated using a slightly modified
-    version of code_generators/generate_umath.py and adding these
-    lines to defdict:
-
-defdict = {
-'inner1d' :
-    Ufunc(2, 1, None_,
-        r'''inner on the last dimension and broadcast on the rest \n"
-        "     \"(i),(i)->()\" \n''',
-          TD('ld'),
-          ),
-'innerwt' :
-    Ufunc(3, 1, None_,
-          r'''inner1d with a weight argument \n"
-          "     \"(i),(i),(i)->()\" \n''',
-          TD('ld'),
-          ),
-}
+char *euclidean_pdist_signature = "(n,d)->(p)";
 
+/**begin repeat
+
+   #TYPE=FLOAT,DOUBLE#
+   #typ=npy_float,npy_double#
+   #sqrt_func=sqrtf,sqrt#
 */
 
+/*
+ *  This implements the function
+ *        out[j*(2*n-3-j)+k-1] = sum_d { (in1[j, d] - in1[k, d])^2 }
+ *  with 0 < k < j < n, i.e. computes all unique pairwise euclidean distances.
+ */
+
+static void
+@TYPE@_euclidean_pdist(char **args, npy_intp *dimensions, npy_intp *steps,
+                       void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_2
+    npy_intp len_n = *dimensions++;
+    npy_intp len_d = *dimensions++;
+    npy_intp len_p = *dimensions;
+    npy_intp stride_n = *steps++;
+    npy_intp stride_d = *steps++;
+    npy_intp stride_p = *steps;
+
+    assert(len_n * (len_n - 1) / 2 == len_p);
+
+    BEGIN_OUTER_LOOP_2
+        const char *data_this = (const char *)args[0];
+        char *data_out = args[1];
+        npy_intp n;
+        for (n = 0; n < len_n; ++n) {
+            const char *data_that = data_this + stride_n;
+            npy_intp nn;
+            for (nn = n + 1; nn < len_n; ++nn) {
+                const char *ptr_this = data_this;
+                const char *ptr_that = data_that;
+                @typ@ out = 0;
+                npy_intp d;
+                for (d = 0; d < len_d; ++d) {
+                    const @typ@ delta = *(const @typ@ *)ptr_this -
+                                        *(const @typ@ *)ptr_that;
+                    out += delta * delta;
+                    ptr_this += stride_d;
+                    ptr_that += stride_d;
+                }
+                *(@typ@ *)data_out = @sqrt_func@(out);
+                data_that += stride_n;
+                data_out += stride_p;
+            }
+            data_this += stride_n;
+        }
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+
 static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d };
 static void * inner1d_data[] = { (void *)NULL, (void *)NULL };
 static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
@@ -208,39 +250,49 @@ static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multip
 static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
 static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG,  NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,  NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
 
+static PyUFuncGenericFunction euclidean_pdist_functions[] =
+                            { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
+static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
+static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
+                                             NPY_DOUBLE, NPY_DOUBLE };
+
+
 static void
 addUfuncs(PyObject *dictionary) {
     PyObject *f;
 
-    f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, inner1d_signatures, 2,
-                                    2, 1, PyUFunc_None, "inner1d",
-                                    "inner on the last dimension and broadcast on the rest \n"\
-                                    "     \"(i),(i)->()\" \n",
-                                    0, inner1d_signature);
+    f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data,
+                    inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d",
+                    "inner on the last dimension and broadcast on the rest \n"
+                    "     \"(i),(i)->()\" \n",
+                    0, inner1d_signature);
     PyDict_SetItemString(dictionary, "inner1d", f);
     Py_DECREF(f);
-    f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data, innerwt_signatures, 2,
-                                    3, 1, PyUFunc_None, "innerwt",
-                                    "inner1d with a weight argument \n"\
-                                    "     \"(i),(i),(i)->()\" \n",
-                                    0, innerwt_signature);
+    f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data,
+                    innerwt_signatures, 2, 3, 1, PyUFunc_None, "innerwt",
+                    "inner1d with a weight argument \n"
+                    "     \"(i),(i),(i)->()\" \n",
+                    0, innerwt_signature);
     PyDict_SetItemString(dictionary, "innerwt", f);
     Py_DECREF(f);
     f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions,
-                                    matrix_multiply_data, matrix_multiply_signatures,
-                                    3, 2, 1, PyUFunc_None, "matrix_multiply",
-                                    "matrix multiplication on last two dimensions \n"\
-                                    "     \"(m,n),(n,p)->(m,p)\" \n",
-                                    0, matrix_multiply_signature);
+                    matrix_multiply_data, matrix_multiply_signatures,
+                    3, 2, 1, PyUFunc_None, "matrix_multiply",
+                    "matrix multiplication on last two dimensions \n"
+                    "     \"(m,n),(n,p)->(m,p)\" \n",
+                    0, matrix_multiply_signature);
     PyDict_SetItemString(dictionary, "matrix_multiply", f);
     Py_DECREF(f);
+    f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
+                    eucldiean_pdist_data, euclidean_pdist_signatures,
+                    2, 1, 1, PyUFunc_None, "euclidean_pdist",
+                    "pairwise euclidean distance on last two dimensions \n"
+                    "     \"(n,d)->(p)\" \n",
+                    0, euclidean_pdist_signature);
+    PyDict_SetItemString(dictionary, "euclidean_pdist", f);
+    Py_DECREF(f);
 }
 
-/*
-    End of auto-generated code.
-*/
-
-
 
 static PyObject *
 UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index eacc266be..a285d5334 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -388,21 +388,18 @@ class TestUfunc(TestCase):
         msg = "extend & broadcast loop dimensions"
         b = np.arange(4).reshape((2, 2))
         assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg)
-        msg = "broadcast in core dimensions"
+        # Broadcast in core dimensions should fail
         a = np.arange(8).reshape((4, 2))
         b = np.arange(4).reshape((4, 1))
-        assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg)
-        msg = "extend & broadcast core and loop dimensions"
+        assert_raises(ValueError, umt.inner1d, a, b)
+        # Extend core dimensions should fail
         a = np.arange(8).reshape((4, 2))
         b = np.array(7)
-        assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg)
-        msg = "broadcast should fail"
+        assert_raises(ValueError, umt.inner1d, a, b)
+        # Broadcast should fail
         a = np.arange(2).reshape((2, 1, 1))
         b = np.arange(3).reshape((3, 1, 1))
-        try:
-            ret = umt.inner1d(a, b)
-            assert_equal(ret, None, err_msg=msg)
-        except ValueError: None
+        assert_raises(ValueError, umt.inner1d, a, b)
 
     def test_type_cast(self):
         msg = "type cast"
@@ -542,8 +539,8 @@ class TestUfunc(TestCase):
                         a2 = d2.transpose(p2)[s2]
                         ref = ref and a1.base != None
                         ref = ref and a2.base != None
-                        if broadcastable(a1.shape[-1], a2.shape[-2]) and \
-                           broadcastable(a1.shape[0], a2.shape[0]):
+                        if (a1.shape[-1] == a2.shape[-2] and
+                                broadcastable(a1.shape[0], a2.shape[0])):
                             assert_array_almost_equal(
                                 umt.matrix_multiply(a1, a2),
                                 np.sum(a2[..., np.newaxis].swapaxes(-3, -1) *
@@ -553,6 +550,16 @@ class TestUfunc(TestCase):
 
         assert_equal(ref, True, err_msg="reference check")
 
+    def test_euclidean_pdist(self):
+        a = np.arange(12, dtype=np.float).reshape(4, 3)
+        out = np.empty((a.shape[0] * (a.shape[0] - 1) // 2,), dtype=a.dtype)
+        umt.euclidean_pdist(a, out)
+        b = np.sqrt(np.sum((a[:, None] - a)**2, axis=-1))
+        b = b[~np.tri(a.shape[0], dtype=bool)]
+        assert_almost_equal(out, b)
+        # An output array is required to determine p with signature (n,d)->(p)
+        assert_raises(ValueError, umt.euclidean_pdist, a)
+
     def test_object_logical(self):
         a = np.array([3, None, True, False, "test", ""], dtype=object)
         assert_equal(np.logical_or(a, None),
author	Charles Harris <charlesr.harris@gmail.com>	2014-10-27 12:07:48 -0700
committer	Charles Harris <charlesr.harris@gmail.com>	2014-10-27 12:07:48 -0700
commit	16575443239fa84615fc795692a79ef27f25c216 (patch)
tree	ed9c9e0fb1f74d45eebc69317a4680ce5160cb11 /numpy/core
parent	23ee379e86434518bc33ccd9e711a86188914de0 (diff)
parent	528bac1380c782772b9de207bb8466b03117b96d (diff)
download	numpy-16575443239fa84615fc795692a79ef27f25c216.tar.gz