diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2014-10-27 12:07:48 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2014-10-27 12:07:48 -0700 |
commit | 16575443239fa84615fc795692a79ef27f25c216 (patch) | |
tree | ed9c9e0fb1f74d45eebc69317a4680ce5160cb11 /numpy/core | |
parent | 23ee379e86434518bc33ccd9e711a86188914de0 (diff) | |
parent | 528bac1380c782772b9de207bb8466b03117b96d (diff) | |
download | numpy-16575443239fa84615fc795692a79ef27f25c216.tar.gz |
Merge pull request #5077 from jaimefrio/gufuncs_core_dim_no_broadcast
WIP: gufunc core dimensions should not broadcast
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 115 | ||||
-rw-r--r-- | numpy/core/src/umath/umath_tests.c.src | 136 | ||||
-rw-r--r-- | numpy/core/tests/test_ufunc.py | 29 |
3 files changed, 192 insertions, 88 deletions
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index b12fbf57f..466cc2b22 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -1901,60 +1901,67 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, } /* - * Validate the core dimensions of all the operands, - * and collect all of the labeled core dimension sizes - * into the array 'core_dim_sizes'. Initialize them to - * 1, for example in the case where the operand broadcasts - * to a core dimension, it won't be visited. + * Validate the core dimensions of all the operands, and collect all of + * the labelled core dimensions into 'core_dim_sizes'. + * + * The behavior has been changed in Numpy 1.10.0, and the following + * requirements must be fulfilled or an error will be raised: + * * Arguments, both input and output, must have at least as many + * dimensions as the corresponding number of core dimensions. In + * previous versions, 1's were prepended to the shape as needed. + * * Core dimensions with same labels must have exactly matching sizes. + * In previous versions, core dimensions of size 1 would broadcast + * against other core dimensions with the same label. + * * All core dimensions must have their size specified by a passed in + * input or output argument. In previous versions, core dimensions in + * an output argument that were not specified in an input argument, + * and whose size could not be inferred from a passed in output + * argument, would have their size set to 1. */ for (i = 0; i < ufunc->core_num_dim_ix; ++i) { - core_dim_sizes[i] = 1; + core_dim_sizes[i] = -1; } for (i = 0; i < nop; ++i) { if (op[i] != NULL) { int dim_offset = ufunc->core_offsets[i]; int num_dims = ufunc->core_num_dims[i]; int core_start_dim = PyArray_NDIM(op[i]) - num_dims; - /* Make sure any output operand has enough dimensions */ - if (i >= nin && core_start_dim < 0) { + + /* Check if operands have enough dimensions */ + if (core_start_dim < 0) { PyErr_Format(PyExc_ValueError, - "%s: Output operand %d does not have enough dimensions " - "(has %d, gufunc core with signature %s " - "requires %d)", - ufunc_name, i - nin, PyArray_NDIM(op[i]), + "%s: %s operand %d does not have enough " + "dimensions (has %d, gufunc core with " + "signature %s requires %d)", + ufunc_name, i < nin ? "Input" : "Output", + i < nin ? i : i - nin, PyArray_NDIM(op[i]), ufunc->core_signature, num_dims); retval = -1; goto fail; } /* - * Make sure each core dimension matches all other core - * dimensions with the same label - * - * NOTE: For input operands, core_start_dim may be negative. - * In that case, the operand is being broadcast onto - * core dimensions. For example, a scalar will broadcast - * to fit any core signature. + * Make sure every core dimension exactly matches all other core + * dimensions with the same label. */ - if (core_start_dim >= 0) { - idim = 0; - } else { - idim = -core_start_dim; - } - for (; idim < num_dims; ++idim) { - int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim]; + for (idim = 0; idim < num_dims; ++idim) { + int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim]; npy_intp op_dim_size = - PyArray_SHAPE(op[i])[core_start_dim + idim]; - if (core_dim_sizes[core_dim_index] == 1) { + PyArray_DIM(op[i], core_start_dim+idim); + + if (core_dim_sizes[core_dim_index] == -1) { core_dim_sizes[core_dim_index] = op_dim_size; - } else if ((i >= nin || op_dim_size != 1) && - core_dim_sizes[core_dim_index] != op_dim_size) { + } + else if (op_dim_size != core_dim_sizes[core_dim_index]) { PyErr_Format(PyExc_ValueError, - "%s: Operand %d has a mismatch in its core " - "dimension %d, with gufunc signature %s " - "(size %zd is different from %zd)", - ufunc_name, i, idim, ufunc->core_signature, - op_dim_size, core_dim_sizes[core_dim_index]); + "%s: %s operand %d has a mismatch in its " + "core dimension %d, with gufunc " + "signature %s (size %zd is different " + "from %zd)", + ufunc_name, i < nin ? "Input" : "Output", + i < nin ? i : i - nin, idim, + ufunc->core_signature, op_dim_size, + core_dim_sizes[core_dim_index]); retval = -1; goto fail; } @@ -1962,6 +1969,44 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, } } + /* + * Make sure no core dimension is unspecified. + */ + for (i = 0; i < ufunc->core_num_dim_ix; ++i) { + if (core_dim_sizes[i] == -1) { + break; + } + } + if (i != ufunc->core_num_dim_ix) { + /* + * There is at least one core dimension missing, find in which + * operand it comes up first (it has to be an output operand). + */ + const int missing_core_dim = i; + int out_op; + for (out_op = nin; out_op < nop; ++out_op) { + int first_idx = ufunc->core_offsets[out_op]; + int last_idx = first_idx + ufunc->core_num_dims[out_op]; + for (i = first_idx; i < last_idx; ++i) { + if (ufunc->core_dim_ixs[i] == missing_core_dim) { + break; + } + } + if (i < last_idx) { + /* Change index offsets for error message */ + out_op -= nin; + i -= first_idx; + break; + } + } + PyErr_Format(PyExc_ValueError, + "%s: Output operand %d has core dimension %d " + "unspecified, with gufunc signature %s", + ufunc_name, out_op, i, ufunc->core_signature); + retval = -1; + goto fail; + } + /* Fill in the initial part of 'iter_shape' */ for (idim = 0; idim < broadcast_ndim; ++idim) { iter_shape[idim] = -1; diff --git a/numpy/core/src/umath/umath_tests.c.src b/numpy/core/src/umath/umath_tests.c.src index 46d06288d..33d9846bd 100644 --- a/numpy/core/src/umath/umath_tests.c.src +++ b/numpy/core/src/umath/umath_tests.c.src @@ -38,6 +38,9 @@ INIT_OUTER_LOOP_3 \ npy_intp s3 = *steps++; +#define BEGIN_OUTER_LOOP_2 \ + for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) { + #define BEGIN_OUTER_LOOP_3 \ for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) { @@ -58,13 +61,14 @@ char *inner1d_signature = "(i),(i)->()"; /**begin repeat #TYPE=LONG,DOUBLE# - #typ=npy_long, npy_double# + #typ=npy_long,npy_double# */ /* * This implements the function * out[n] = sum_i { in1[n, i] * in2[n, i] }. */ + static void @TYPE@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { @@ -91,7 +95,7 @@ char *innerwt_signature = "(i),(i),(i)->()"; /**begin repeat #TYPE=LONG,DOUBLE# - #typ=npy_long, npy_double# + #typ=npy_long,npy_double# */ @@ -127,7 +131,7 @@ char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)"; /**begin repeat #TYPE=FLOAT,DOUBLE,LONG# - #typ=npy_float, npy_double,npy_long# + #typ=npy_float,npy_double,npy_long# */ /* @@ -135,7 +139,6 @@ char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)"; * out[k, m, p] = sum_n { in1[k, m, n] * in2[k, n, p] }. */ - static void @TYPE@_matrix_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { @@ -177,27 +180,66 @@ static void /**end repeat**/ -/* The following lines were generated using a slightly modified - version of code_generators/generate_umath.py and adding these - lines to defdict: - -defdict = { -'inner1d' : - Ufunc(2, 1, None_, - r'''inner on the last dimension and broadcast on the rest \n" - " \"(i),(i)->()\" \n''', - TD('ld'), - ), -'innerwt' : - Ufunc(3, 1, None_, - r'''inner1d with a weight argument \n" - " \"(i),(i),(i)->()\" \n''', - TD('ld'), - ), -} +char *euclidean_pdist_signature = "(n,d)->(p)"; +/**begin repeat + + #TYPE=FLOAT,DOUBLE# + #typ=npy_float,npy_double# + #sqrt_func=sqrtf,sqrt# */ +/* + * This implements the function + * out[j*(2*n-3-j)+k-1] = sum_d { (in1[j, d] - in1[k, d])^2 } + * with 0 < k < j < n, i.e. computes all unique pairwise euclidean distances. + */ + +static void +@TYPE@_euclidean_pdist(char **args, npy_intp *dimensions, npy_intp *steps, + void *NPY_UNUSED(func)) +{ + INIT_OUTER_LOOP_2 + npy_intp len_n = *dimensions++; + npy_intp len_d = *dimensions++; + npy_intp len_p = *dimensions; + npy_intp stride_n = *steps++; + npy_intp stride_d = *steps++; + npy_intp stride_p = *steps; + + assert(len_n * (len_n - 1) / 2 == len_p); + + BEGIN_OUTER_LOOP_2 + const char *data_this = (const char *)args[0]; + char *data_out = args[1]; + npy_intp n; + for (n = 0; n < len_n; ++n) { + const char *data_that = data_this + stride_n; + npy_intp nn; + for (nn = n + 1; nn < len_n; ++nn) { + const char *ptr_this = data_this; + const char *ptr_that = data_that; + @typ@ out = 0; + npy_intp d; + for (d = 0; d < len_d; ++d) { + const @typ@ delta = *(const @typ@ *)ptr_this - + *(const @typ@ *)ptr_that; + out += delta * delta; + ptr_this += stride_d; + ptr_that += stride_d; + } + *(@typ@ *)data_out = @sqrt_func@(out); + data_that += stride_n; + data_out += stride_p; + } + data_this += stride_n; + } + END_OUTER_LOOP +} + +/**end repeat**/ + + static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d }; static void * inner1d_data[] = { (void *)NULL, (void *)NULL }; static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE }; @@ -208,39 +250,49 @@ static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multip static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL }; static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE }; +static PyUFuncGenericFunction euclidean_pdist_functions[] = + { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist }; +static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL }; +static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE }; + + static void addUfuncs(PyObject *dictionary) { PyObject *f; - f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, inner1d_signatures, 2, - 2, 1, PyUFunc_None, "inner1d", - "inner on the last dimension and broadcast on the rest \n"\ - " \"(i),(i)->()\" \n", - 0, inner1d_signature); + f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, + inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d", + "inner on the last dimension and broadcast on the rest \n" + " \"(i),(i)->()\" \n", + 0, inner1d_signature); PyDict_SetItemString(dictionary, "inner1d", f); Py_DECREF(f); - f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data, innerwt_signatures, 2, - 3, 1, PyUFunc_None, "innerwt", - "inner1d with a weight argument \n"\ - " \"(i),(i),(i)->()\" \n", - 0, innerwt_signature); + f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data, + innerwt_signatures, 2, 3, 1, PyUFunc_None, "innerwt", + "inner1d with a weight argument \n" + " \"(i),(i),(i)->()\" \n", + 0, innerwt_signature); PyDict_SetItemString(dictionary, "innerwt", f); Py_DECREF(f); f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions, - matrix_multiply_data, matrix_multiply_signatures, - 3, 2, 1, PyUFunc_None, "matrix_multiply", - "matrix multiplication on last two dimensions \n"\ - " \"(m,n),(n,p)->(m,p)\" \n", - 0, matrix_multiply_signature); + matrix_multiply_data, matrix_multiply_signatures, + 3, 2, 1, PyUFunc_None, "matrix_multiply", + "matrix multiplication on last two dimensions \n" + " \"(m,n),(n,p)->(m,p)\" \n", + 0, matrix_multiply_signature); PyDict_SetItemString(dictionary, "matrix_multiply", f); Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions, + eucldiean_pdist_data, euclidean_pdist_signatures, + 2, 1, 1, PyUFunc_None, "euclidean_pdist", + "pairwise euclidean distance on last two dimensions \n" + " \"(n,d)->(p)\" \n", + 0, euclidean_pdist_signature); + PyDict_SetItemString(dictionary, "euclidean_pdist", f); + Py_DECREF(f); } -/* - End of auto-generated code. -*/ - - static PyObject * UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args) diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index eacc266be..a285d5334 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -388,21 +388,18 @@ class TestUfunc(TestCase): msg = "extend & broadcast loop dimensions" b = np.arange(4).reshape((2, 2)) assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) - msg = "broadcast in core dimensions" + # Broadcast in core dimensions should fail a = np.arange(8).reshape((4, 2)) b = np.arange(4).reshape((4, 1)) - assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) - msg = "extend & broadcast core and loop dimensions" + assert_raises(ValueError, umt.inner1d, a, b) + # Extend core dimensions should fail a = np.arange(8).reshape((4, 2)) b = np.array(7) - assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg) - msg = "broadcast should fail" + assert_raises(ValueError, umt.inner1d, a, b) + # Broadcast should fail a = np.arange(2).reshape((2, 1, 1)) b = np.arange(3).reshape((3, 1, 1)) - try: - ret = umt.inner1d(a, b) - assert_equal(ret, None, err_msg=msg) - except ValueError: None + assert_raises(ValueError, umt.inner1d, a, b) def test_type_cast(self): msg = "type cast" @@ -542,8 +539,8 @@ class TestUfunc(TestCase): a2 = d2.transpose(p2)[s2] ref = ref and a1.base != None ref = ref and a2.base != None - if broadcastable(a1.shape[-1], a2.shape[-2]) and \ - broadcastable(a1.shape[0], a2.shape[0]): + if (a1.shape[-1] == a2.shape[-2] and + broadcastable(a1.shape[0], a2.shape[0])): assert_array_almost_equal( umt.matrix_multiply(a1, a2), np.sum(a2[..., np.newaxis].swapaxes(-3, -1) * @@ -553,6 +550,16 @@ class TestUfunc(TestCase): assert_equal(ref, True, err_msg="reference check") + def test_euclidean_pdist(self): + a = np.arange(12, dtype=np.float).reshape(4, 3) + out = np.empty((a.shape[0] * (a.shape[0] - 1) // 2,), dtype=a.dtype) + umt.euclidean_pdist(a, out) + b = np.sqrt(np.sum((a[:, None] - a)**2, axis=-1)) + b = b[~np.tri(a.shape[0], dtype=bool)] + assert_almost_equal(out, b) + # An output array is required to determine p with signature (n,d)->(p) + assert_raises(ValueError, umt.euclidean_pdist, a) + def test_object_logical(self): a = np.array([3, None, True, False, "test", ""], dtype=object) assert_equal(np.logical_or(a, None), |