summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2014-10-27 12:07:48 -0700
committerCharles Harris <charlesr.harris@gmail.com>2014-10-27 12:07:48 -0700
commit16575443239fa84615fc795692a79ef27f25c216 (patch)
treeed9c9e0fb1f74d45eebc69317a4680ce5160cb11 /numpy/core
parent23ee379e86434518bc33ccd9e711a86188914de0 (diff)
parent528bac1380c782772b9de207bb8466b03117b96d (diff)
downloadnumpy-16575443239fa84615fc795692a79ef27f25c216.tar.gz
Merge pull request #5077 from jaimefrio/gufuncs_core_dim_no_broadcast
WIP: gufunc core dimensions should not broadcast
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/umath/ufunc_object.c115
-rw-r--r--numpy/core/src/umath/umath_tests.c.src136
-rw-r--r--numpy/core/tests/test_ufunc.py29
3 files changed, 192 insertions, 88 deletions
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index b12fbf57f..466cc2b22 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1901,60 +1901,67 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
}
/*
- * Validate the core dimensions of all the operands,
- * and collect all of the labeled core dimension sizes
- * into the array 'core_dim_sizes'. Initialize them to
- * 1, for example in the case where the operand broadcasts
- * to a core dimension, it won't be visited.
+ * Validate the core dimensions of all the operands, and collect all of
+ * the labelled core dimensions into 'core_dim_sizes'.
+ *
+ * The behavior has been changed in Numpy 1.10.0, and the following
+ * requirements must be fulfilled or an error will be raised:
+ * * Arguments, both input and output, must have at least as many
+ * dimensions as the corresponding number of core dimensions. In
+ * previous versions, 1's were prepended to the shape as needed.
+ * * Core dimensions with same labels must have exactly matching sizes.
+ * In previous versions, core dimensions of size 1 would broadcast
+ * against other core dimensions with the same label.
+ * * All core dimensions must have their size specified by a passed in
+ * input or output argument. In previous versions, core dimensions in
+ * an output argument that were not specified in an input argument,
+ * and whose size could not be inferred from a passed in output
+ * argument, would have their size set to 1.
*/
for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
- core_dim_sizes[i] = 1;
+ core_dim_sizes[i] = -1;
}
for (i = 0; i < nop; ++i) {
if (op[i] != NULL) {
int dim_offset = ufunc->core_offsets[i];
int num_dims = ufunc->core_num_dims[i];
int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
- /* Make sure any output operand has enough dimensions */
- if (i >= nin && core_start_dim < 0) {
+
+ /* Check if operands have enough dimensions */
+ if (core_start_dim < 0) {
PyErr_Format(PyExc_ValueError,
- "%s: Output operand %d does not have enough dimensions "
- "(has %d, gufunc core with signature %s "
- "requires %d)",
- ufunc_name, i - nin, PyArray_NDIM(op[i]),
+ "%s: %s operand %d does not have enough "
+ "dimensions (has %d, gufunc core with "
+ "signature %s requires %d)",
+ ufunc_name, i < nin ? "Input" : "Output",
+ i < nin ? i : i - nin, PyArray_NDIM(op[i]),
ufunc->core_signature, num_dims);
retval = -1;
goto fail;
}
/*
- * Make sure each core dimension matches all other core
- * dimensions with the same label
- *
- * NOTE: For input operands, core_start_dim may be negative.
- * In that case, the operand is being broadcast onto
- * core dimensions. For example, a scalar will broadcast
- * to fit any core signature.
+ * Make sure every core dimension exactly matches all other core
+ * dimensions with the same label.
*/
- if (core_start_dim >= 0) {
- idim = 0;
- } else {
- idim = -core_start_dim;
- }
- for (; idim < num_dims; ++idim) {
- int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+ for (idim = 0; idim < num_dims; ++idim) {
+ int core_dim_index = ufunc->core_dim_ixs[dim_offset+idim];
npy_intp op_dim_size =
- PyArray_SHAPE(op[i])[core_start_dim + idim];
- if (core_dim_sizes[core_dim_index] == 1) {
+ PyArray_DIM(op[i], core_start_dim+idim);
+
+ if (core_dim_sizes[core_dim_index] == -1) {
core_dim_sizes[core_dim_index] = op_dim_size;
- } else if ((i >= nin || op_dim_size != 1) &&
- core_dim_sizes[core_dim_index] != op_dim_size) {
+ }
+ else if (op_dim_size != core_dim_sizes[core_dim_index]) {
PyErr_Format(PyExc_ValueError,
- "%s: Operand %d has a mismatch in its core "
- "dimension %d, with gufunc signature %s "
- "(size %zd is different from %zd)",
- ufunc_name, i, idim, ufunc->core_signature,
- op_dim_size, core_dim_sizes[core_dim_index]);
+ "%s: %s operand %d has a mismatch in its "
+ "core dimension %d, with gufunc "
+ "signature %s (size %zd is different "
+ "from %zd)",
+ ufunc_name, i < nin ? "Input" : "Output",
+ i < nin ? i : i - nin, idim,
+ ufunc->core_signature, op_dim_size,
+ core_dim_sizes[core_dim_index]);
retval = -1;
goto fail;
}
@@ -1962,6 +1969,44 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
}
}
+ /*
+ * Make sure no core dimension is unspecified.
+ */
+ for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
+ if (core_dim_sizes[i] == -1) {
+ break;
+ }
+ }
+ if (i != ufunc->core_num_dim_ix) {
+ /*
+ * There is at least one core dimension missing, find in which
+ * operand it comes up first (it has to be an output operand).
+ */
+ const int missing_core_dim = i;
+ int out_op;
+ for (out_op = nin; out_op < nop; ++out_op) {
+ int first_idx = ufunc->core_offsets[out_op];
+ int last_idx = first_idx + ufunc->core_num_dims[out_op];
+ for (i = first_idx; i < last_idx; ++i) {
+ if (ufunc->core_dim_ixs[i] == missing_core_dim) {
+ break;
+ }
+ }
+ if (i < last_idx) {
+ /* Change index offsets for error message */
+ out_op -= nin;
+ i -= first_idx;
+ break;
+ }
+ }
+ PyErr_Format(PyExc_ValueError,
+ "%s: Output operand %d has core dimension %d "
+ "unspecified, with gufunc signature %s",
+ ufunc_name, out_op, i, ufunc->core_signature);
+ retval = -1;
+ goto fail;
+ }
+
/* Fill in the initial part of 'iter_shape' */
for (idim = 0; idim < broadcast_ndim; ++idim) {
iter_shape[idim] = -1;
diff --git a/numpy/core/src/umath/umath_tests.c.src b/numpy/core/src/umath/umath_tests.c.src
index 46d06288d..33d9846bd 100644
--- a/numpy/core/src/umath/umath_tests.c.src
+++ b/numpy/core/src/umath/umath_tests.c.src
@@ -38,6 +38,9 @@
INIT_OUTER_LOOP_3 \
npy_intp s3 = *steps++;
+#define BEGIN_OUTER_LOOP_2 \
+ for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1) {
+
#define BEGIN_OUTER_LOOP_3 \
for (N_ = 0; N_ < dN; N_++, args[0] += s0, args[1] += s1, args[2] += s2) {
@@ -58,13 +61,14 @@ char *inner1d_signature = "(i),(i)->()";
/**begin repeat
#TYPE=LONG,DOUBLE#
- #typ=npy_long, npy_double#
+ #typ=npy_long,npy_double#
*/
/*
* This implements the function
* out[n] = sum_i { in1[n, i] * in2[n, i] }.
*/
+
static void
@TYPE@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
@@ -91,7 +95,7 @@ char *innerwt_signature = "(i),(i),(i)->()";
/**begin repeat
#TYPE=LONG,DOUBLE#
- #typ=npy_long, npy_double#
+ #typ=npy_long,npy_double#
*/
@@ -127,7 +131,7 @@ char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
/**begin repeat
#TYPE=FLOAT,DOUBLE,LONG#
- #typ=npy_float, npy_double,npy_long#
+ #typ=npy_float,npy_double,npy_long#
*/
/*
@@ -135,7 +139,6 @@ char *matrix_multiply_signature = "(m,n),(n,p)->(m,p)";
* out[k, m, p] = sum_n { in1[k, m, n] * in2[k, n, p] }.
*/
-
static void
@TYPE@_matrix_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
@@ -177,27 +180,66 @@ static void
/**end repeat**/
-/* The following lines were generated using a slightly modified
- version of code_generators/generate_umath.py and adding these
- lines to defdict:
-
-defdict = {
-'inner1d' :
- Ufunc(2, 1, None_,
- r'''inner on the last dimension and broadcast on the rest \n"
- " \"(i),(i)->()\" \n''',
- TD('ld'),
- ),
-'innerwt' :
- Ufunc(3, 1, None_,
- r'''inner1d with a weight argument \n"
- " \"(i),(i),(i)->()\" \n''',
- TD('ld'),
- ),
-}
+char *euclidean_pdist_signature = "(n,d)->(p)";
+/**begin repeat
+
+ #TYPE=FLOAT,DOUBLE#
+ #typ=npy_float,npy_double#
+ #sqrt_func=sqrtf,sqrt#
*/
+/*
+ * This implements the function
+ * out[j*(2*n-3-j)+k-1] = sum_d { (in1[j, d] - in1[k, d])^2 }
+ * with 0 < k < j < n, i.e. computes all unique pairwise euclidean distances.
+ */
+
+static void
+@TYPE@_euclidean_pdist(char **args, npy_intp *dimensions, npy_intp *steps,
+ void *NPY_UNUSED(func))
+{
+ INIT_OUTER_LOOP_2
+ npy_intp len_n = *dimensions++;
+ npy_intp len_d = *dimensions++;
+ npy_intp len_p = *dimensions;
+ npy_intp stride_n = *steps++;
+ npy_intp stride_d = *steps++;
+ npy_intp stride_p = *steps;
+
+ assert(len_n * (len_n - 1) / 2 == len_p);
+
+ BEGIN_OUTER_LOOP_2
+ const char *data_this = (const char *)args[0];
+ char *data_out = args[1];
+ npy_intp n;
+ for (n = 0; n < len_n; ++n) {
+ const char *data_that = data_this + stride_n;
+ npy_intp nn;
+ for (nn = n + 1; nn < len_n; ++nn) {
+ const char *ptr_this = data_this;
+ const char *ptr_that = data_that;
+ @typ@ out = 0;
+ npy_intp d;
+ for (d = 0; d < len_d; ++d) {
+ const @typ@ delta = *(const @typ@ *)ptr_this -
+ *(const @typ@ *)ptr_that;
+ out += delta * delta;
+ ptr_this += stride_d;
+ ptr_that += stride_d;
+ }
+ *(@typ@ *)data_out = @sqrt_func@(out);
+ data_that += stride_n;
+ data_out += stride_p;
+ }
+ data_this += stride_n;
+ }
+ END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+
static PyUFuncGenericFunction inner1d_functions[] = { LONG_inner1d, DOUBLE_inner1d };
static void * inner1d_data[] = { (void *)NULL, (void *)NULL };
static char inner1d_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
@@ -208,39 +250,49 @@ static PyUFuncGenericFunction matrix_multiply_functions[] = { LONG_matrix_multip
static void *matrix_multiply_data[] = { (void *)NULL, (void *)NULL, (void *)NULL };
static char matrix_multiply_signatures[] = { NPY_LONG, NPY_LONG, NPY_LONG, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE };
+static PyUFuncGenericFunction euclidean_pdist_functions[] =
+ { FLOAT_euclidean_pdist, DOUBLE_euclidean_pdist };
+static void *eucldiean_pdist_data[] = { (void *)NULL, (void *)NULL };
+static char euclidean_pdist_signatures[] = { NPY_FLOAT, NPY_FLOAT,
+ NPY_DOUBLE, NPY_DOUBLE };
+
+
static void
addUfuncs(PyObject *dictionary) {
PyObject *f;
- f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, inner1d_signatures, 2,
- 2, 1, PyUFunc_None, "inner1d",
- "inner on the last dimension and broadcast on the rest \n"\
- " \"(i),(i)->()\" \n",
- 0, inner1d_signature);
+ f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data,
+ inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d",
+ "inner on the last dimension and broadcast on the rest \n"
+ " \"(i),(i)->()\" \n",
+ 0, inner1d_signature);
PyDict_SetItemString(dictionary, "inner1d", f);
Py_DECREF(f);
- f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data, innerwt_signatures, 2,
- 3, 1, PyUFunc_None, "innerwt",
- "inner1d with a weight argument \n"\
- " \"(i),(i),(i)->()\" \n",
- 0, innerwt_signature);
+ f = PyUFunc_FromFuncAndDataAndSignature(innerwt_functions, innerwt_data,
+ innerwt_signatures, 2, 3, 1, PyUFunc_None, "innerwt",
+ "inner1d with a weight argument \n"
+ " \"(i),(i),(i)->()\" \n",
+ 0, innerwt_signature);
PyDict_SetItemString(dictionary, "innerwt", f);
Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(matrix_multiply_functions,
- matrix_multiply_data, matrix_multiply_signatures,
- 3, 2, 1, PyUFunc_None, "matrix_multiply",
- "matrix multiplication on last two dimensions \n"\
- " \"(m,n),(n,p)->(m,p)\" \n",
- 0, matrix_multiply_signature);
+ matrix_multiply_data, matrix_multiply_signatures,
+ 3, 2, 1, PyUFunc_None, "matrix_multiply",
+ "matrix multiplication on last two dimensions \n"
+ " \"(m,n),(n,p)->(m,p)\" \n",
+ 0, matrix_multiply_signature);
PyDict_SetItemString(dictionary, "matrix_multiply", f);
Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(euclidean_pdist_functions,
+ eucldiean_pdist_data, euclidean_pdist_signatures,
+ 2, 1, 1, PyUFunc_None, "euclidean_pdist",
+ "pairwise euclidean distance on last two dimensions \n"
+ " \"(n,d)->(p)\" \n",
+ 0, euclidean_pdist_signature);
+ PyDict_SetItemString(dictionary, "euclidean_pdist", f);
+ Py_DECREF(f);
}
-/*
- End of auto-generated code.
-*/
-
-
static PyObject *
UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index eacc266be..a285d5334 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -388,21 +388,18 @@ class TestUfunc(TestCase):
msg = "extend & broadcast loop dimensions"
b = np.arange(4).reshape((2, 2))
assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg)
- msg = "broadcast in core dimensions"
+ # Broadcast in core dimensions should fail
a = np.arange(8).reshape((4, 2))
b = np.arange(4).reshape((4, 1))
- assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg)
- msg = "extend & broadcast core and loop dimensions"
+ assert_raises(ValueError, umt.inner1d, a, b)
+ # Extend core dimensions should fail
a = np.arange(8).reshape((4, 2))
b = np.array(7)
- assert_array_equal(umt.inner1d(a, b), np.sum(a*b, axis=-1), err_msg=msg)
- msg = "broadcast should fail"
+ assert_raises(ValueError, umt.inner1d, a, b)
+ # Broadcast should fail
a = np.arange(2).reshape((2, 1, 1))
b = np.arange(3).reshape((3, 1, 1))
- try:
- ret = umt.inner1d(a, b)
- assert_equal(ret, None, err_msg=msg)
- except ValueError: None
+ assert_raises(ValueError, umt.inner1d, a, b)
def test_type_cast(self):
msg = "type cast"
@@ -542,8 +539,8 @@ class TestUfunc(TestCase):
a2 = d2.transpose(p2)[s2]
ref = ref and a1.base != None
ref = ref and a2.base != None
- if broadcastable(a1.shape[-1], a2.shape[-2]) and \
- broadcastable(a1.shape[0], a2.shape[0]):
+ if (a1.shape[-1] == a2.shape[-2] and
+ broadcastable(a1.shape[0], a2.shape[0])):
assert_array_almost_equal(
umt.matrix_multiply(a1, a2),
np.sum(a2[..., np.newaxis].swapaxes(-3, -1) *
@@ -553,6 +550,16 @@ class TestUfunc(TestCase):
assert_equal(ref, True, err_msg="reference check")
+ def test_euclidean_pdist(self):
+ a = np.arange(12, dtype=np.float).reshape(4, 3)
+ out = np.empty((a.shape[0] * (a.shape[0] - 1) // 2,), dtype=a.dtype)
+ umt.euclidean_pdist(a, out)
+ b = np.sqrt(np.sum((a[:, None] - a)**2, axis=-1))
+ b = b[~np.tri(a.shape[0], dtype=bool)]
+ assert_almost_equal(out, b)
+ # An output array is required to determine p with signature (n,d)->(p)
+ assert_raises(ValueError, umt.euclidean_pdist, a)
+
def test_object_logical(self):
a = np.array([3, None, True, False, "test", ""], dtype=object)
assert_equal(np.logical_or(a, None),