summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/code_generators/numpy_api.py1
-rw-r--r--numpy/core/src/multiarray/convert.c101
-rw-r--r--numpy/core/src/multiarray/einsum.c.src667
-rw-r--r--numpy/core/src/multiarray/new_iterator.c.src27
-rw-r--r--numpy/core/tests/test_numeric.py1
5 files changed, 632 insertions, 165 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 75f89945a..b943f17de 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -301,6 +301,7 @@ multiarray_funcs_api = {
'PyArray_CanCastArrayTo': 266,
'PyArray_CanCastTypeTo': 267,
'PyArray_EinsteinSum': 268,
+ 'PyArray_FillWithZero': 269,
}
ufunc_types_api = {
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index a2c965181..5aef72d45 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -13,6 +13,7 @@
#include "arrayobject.h"
#include "mapping.h"
+#include "lowlevel_strided_loops.h"
#include "convert.h"
@@ -329,8 +330,104 @@ PyArray_FillWithScalar(PyArrayObject *arr, PyObject *obj)
}
/*NUMPY_API
- Copy an array.
-*/
+ * Fills an array with zeros.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+PyArray_FillWithZero(PyArrayObject *a)
+{
+ PyArray_StridedTransferFn *stransfer = NULL;
+ void *transferdata = NULL;
+ PyArray_Descr *dtype = PyArray_DESCR(a);
+ NpyIter *iter;
+
+ NpyIter_IterNext_Fn iternext;
+ char **dataptr;
+ npy_intp stride, *countptr;
+ int needs_api;
+
+ NPY_BEGIN_THREADS_DEF;
+
+ if (!PyArray_ISWRITEABLE(a)) {
+ PyErr_SetString(PyExc_RuntimeError, "cannot write to array");
+ return -1;
+ }
+
+ /* A zero-sized array needs no zeroing */
+ if (PyArray_SIZE(a) == 0) {
+ return 0;
+ }
+
+ /* If it's possible to do a simple memset, do so */
+ if (!PyDataType_REFCHK(dtype) && (PyArray_ISCONTIGUOUS(a) ||
+ PyArray_ISFORTRAN(a))) {
+ memset(PyArray_DATA(a), 0, PyArray_NBYTES(a));
+ return 0;
+ }
+
+ /* Use an iterator to go through all the data */
+ iter = NpyIter_New(a, NPY_ITER_WRITEONLY|NPY_ITER_NO_INNER_ITERATION,
+ NPY_KEEPORDER, NPY_NO_CASTING,
+ NULL, 0, NULL, 0);
+
+ if (iter == NULL) {
+ return -1;
+ }
+
+ iternext = NpyIter_GetIterNext(iter, NULL);
+ if (iternext == NULL) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+ dataptr = NpyIter_GetDataPtrArray(iter);
+ stride = NpyIter_GetInnerStrideArray(iter)[0];
+ countptr = NpyIter_GetInnerLoopSizePtr(iter);
+
+ needs_api = NpyIter_IterationNeedsAPI(iter);
+
+ /*
+ * Because buffering is disabled in the iterator, the inner loop
+ * strides will be the same throughout the iteration loop. Thus,
+ * we can pass them to this function to take advantage of
+ * contiguous strides, etc.
+ *
+ * By setting the src_dtype to NULL, we get a function which sets
+ * the destination to zeros.
+ */
+ if (PyArray_GetDTypeTransferFunction(
+ PyArray_ISALIGNED(a),
+ 0, stride,
+ NULL, PyArray_DESCR(a),
+ 0,
+ &stransfer, &transferdata,
+ &needs_api) != NPY_SUCCEED) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+
+ if (!needs_api) {
+ NPY_BEGIN_THREADS;
+ }
+
+ do {
+ stransfer(NULL, 0, *dataptr, stride,
+ *countptr, 0, transferdata);
+ } while(iternext(iter));
+
+ if (!needs_api) {
+ NPY_END_THREADS;
+ }
+
+ PyArray_FreeStridedTransferData(transferdata);
+ NpyIter_Deallocate(iter);
+
+ return 0;
+}
+
+/*NUMPY_API
+ * Copy an array.
+ */
NPY_NO_EXPORT PyObject *
PyArray_NewCopy(PyArrayObject *m1, NPY_ORDER fortran)
{
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index d7dc73a23..97905ca54 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -17,6 +17,12 @@
#include <ctype.h>
+typedef enum {
+ BROADCAST_LEFT,
+ BROADCAST_RIGHT,
+ BROADCAST_MIDDLE
+} EINSUM_BROADCAST;
+
/*
* Parses the subscripts for one operand into an output
* of 'ndim' labels
@@ -28,9 +34,11 @@ parse_operand_subscripts(char *subscripts, int length,
char *out_label_counts,
int *out_min_label,
int *out_max_label,
- int *out_num_labels)
+ int *out_num_labels,
+ EINSUM_BROADCAST *out_broadcast)
{
int i, idim, ndim_left, label;
+ int left_labels = 0, right_labels = 0;
printf("Parsing operand %d subscripts\n", iop);
@@ -55,6 +63,7 @@ parse_operand_subscripts(char *subscripts, int length,
(*out_num_labels)++;
}
out_label_counts[label]++;
+ right_labels = 1;
}
else {
PyErr_Format(PyExc_ValueError,
@@ -114,6 +123,7 @@ parse_operand_subscripts(char *subscripts, int length,
(*out_num_labels)++;
}
out_label_counts[label]++;
+ left_labels = 1;
}
else {
PyErr_Format(PyExc_ValueError,
@@ -161,6 +171,16 @@ parse_operand_subscripts(char *subscripts, int length,
}
}
+ if (left_labels && right_labels) {
+ *out_broadcast = BROADCAST_MIDDLE;
+ }
+ else if (!left_labels) {
+ *out_broadcast = BROADCAST_RIGHT;
+ }
+ else {
+ *out_broadcast = BROADCAST_LEFT;
+ }
+
return 1;
}
@@ -173,9 +193,11 @@ static int
parse_output_subscripts(char *subscripts, int length,
int ndim_broadcast,
const char *label_counts,
- char *out_labels)
+ char *out_labels,
+ EINSUM_BROADCAST *out_broadcast)
{
int i, nlabels, label, idim, ndim, ndim_left;
+ int left_labels = 0, right_labels = 0;
/* Count the labels, making sure they're all unique and valid */
nlabels = 0;
@@ -230,6 +252,7 @@ parse_output_subscripts(char *subscripts, int length,
"too many output subscripts");
return -1;
}
+ right_labels = 1;
}
/* The end of the ellipsis */
else {
@@ -269,6 +292,7 @@ parse_output_subscripts(char *subscripts, int length,
"too many subscripts for the output");
return -1;
}
+ left_labels = 1;
}
else {
PyErr_SetString(PyExc_ValueError,
@@ -284,9 +308,353 @@ parse_output_subscripts(char *subscripts, int length,
out_labels[idim++] = 0;
}
+ if (left_labels && right_labels) {
+ *out_broadcast = BROADCAST_MIDDLE;
+ }
+ else if (!left_labels) {
+ *out_broadcast = BROADCAST_RIGHT;
+ }
+ else {
+ *out_broadcast = BROADCAST_LEFT;
+ }
+
return ndim;
}
+
+/*
+ * When there's just one operand and no reduction, we
+ * can return a view into op. This calculates the view
+ * if possible.
+ */
+static int
+get_single_op_view(PyArrayObject *op, int iop, char *labels,
+ int ndim_output, char *output_labels,
+ PyArrayObject **ret)
+{
+ npy_intp new_strides[NPY_MAXDIMS];
+ npy_intp new_dims[NPY_MAXDIMS];
+ char *out_label;
+ int label, i, idim, ndim, ibroadcast = 0;
+
+ ndim = PyArray_NDIM(op);
+
+ /* Initialize the dimensions and strides to zero */
+ for (idim = 0; idim < ndim_output; ++idim) {
+ new_dims[idim] = 0;
+ new_strides[idim] = 0;
+ }
+
+ /* Match the labels in the operand with the output labels */
+ for (idim = 0; idim < ndim; ++idim) {
+ printf("Matching label for dimension %d\n", idim);
+ label = labels[idim];
+ /* If this label says to merge axes, get the actual label */
+ if (label < 0) {
+ label = labels[idim+label];
+ }
+ /* If the label is 0, it's an unlabeled broadcast dimension */
+ if (label == 0) {
+ /* The next output label that's a broadcast dimension */
+ for (; ibroadcast < ndim_output; ++ibroadcast) {
+ if (output_labels[ibroadcast] == 0) {
+ break;
+ }
+ }
+ if (ibroadcast == ndim_output) {
+ PyErr_SetString(PyExc_ValueError,
+ "output had too few broadcast dimensions");
+ return 0;
+ }
+ new_dims[ibroadcast] = PyArray_DIM(op, idim);
+ new_strides[ibroadcast] = PyArray_STRIDE(op, idim);
+ ++ibroadcast;
+ }
+ else {
+ /* Find the position for this dimension in the output */
+ out_label = (char *)memchr(output_labels, label,
+ ndim_output);
+ /* If it's not found, reduction -> can't return a view */
+ if (out_label == NULL) {
+ break;
+ }
+ /* Update the dimensions and strides of the output */
+ i = out_label - output_labels;
+ if (new_dims[i] != 0 &&
+ new_dims[i] != PyArray_DIM(op, idim)) {
+ PyErr_Format(PyExc_ValueError,
+ "dimensions in operand %d for collapsing "
+ "index '%c' don't match (%d != %d)",
+ iop, label, (int)new_dims[i],
+ (int)PyArray_DIM(op, idim));
+ return 0;
+ }
+ new_dims[i] = PyArray_DIM(op, idim);
+ new_strides[i] += PyArray_STRIDE(op, idim);
+ }
+ }
+ /* If we processed all the input axes, return a view */
+ if (idim == ndim) {
+ printf("Returning a view\n");
+ Py_INCREF(PyArray_DESCR(op));
+ *ret = (PyArrayObject *)PyArray_NewFromDescr(
+ Py_TYPE(op),
+ PyArray_DESCR(op),
+ ndim_output, new_dims, new_strides,
+ PyArray_DATA(op),
+ 0, (PyObject *)op);
+
+ if (*ret == NULL) {
+ return 0;
+ }
+ if (!PyArray_Check(*ret)) {
+ Py_DECREF(*ret);
+ *ret = NULL;
+ PyErr_SetString(PyExc_RuntimeError,
+ "NewFromDescr failed to return an array");
+ return 0;
+ }
+ PyArray_UpdateFlags(*ret,
+ NPY_C_CONTIGUOUS|NPY_ALIGNED|NPY_F_CONTIGUOUS);
+ Py_INCREF(op);
+ PyArray_BASE(*ret) = (PyObject *)op;
+ return 1;
+ }
+
+ /* Return success, but that we couldn't make a view */
+ *ret = NULL;
+ return 1;
+}
+
+static PyArrayObject *
+get_combined_dims_view(PyArrayObject *op, int iop, char *labels)
+{
+ npy_intp new_strides[NPY_MAXDIMS];
+ npy_intp new_dims[NPY_MAXDIMS];
+ int i, idim, ndim, icombine, combineoffset, label;
+ int icombinemap[NPY_MAXDIMS];
+
+ PyArrayObject *ret = NULL;
+
+ ndim = PyArray_NDIM(op);
+
+ /* Initialize the dimensions and strides to zero */
+ for (idim = 0; idim < ndim; ++idim) {
+ new_dims[idim] = 0;
+ new_strides[idim] = 0;
+ }
+
+ /* Copy the dimensions and strides, except when collapsing */
+ icombine = 0;
+ for (idim = 0; idim < ndim; ++idim) {
+ printf("Processing dimension %d\n", idim);
+ label = labels[idim];
+ /* If this label says to merge axes, get the actual label */
+ if (label < 0) {
+ combineoffset = label;
+ label = labels[idim+label];
+ }
+ else {
+ combineoffset = 0;
+ if (icombine != idim) {
+ labels[icombine] = labels[idim];
+ }
+ icombinemap[idim] = icombine;
+ }
+ /* If the label is 0, it's an unlabeled broadcast dimension */
+ if (label == 0) {
+ new_dims[icombine] = PyArray_DIM(op, idim);
+ new_strides[icombine] = PyArray_STRIDE(op, idim);
+ }
+ else {
+ /* Update the combined axis dimensions and strides */
+ i = idim + combineoffset;
+ if (combineoffset < 0 &&
+ new_dims[i] != PyArray_DIM(op, idim)) {
+ PyErr_Format(PyExc_ValueError,
+ "dimensions in operand %d for collapsing "
+ "index '%c' don't match (%d != %d)",
+ iop, label, (int)new_dims[i],
+ (int)PyArray_DIM(op, idim));
+ return NULL;
+ }
+ i = icombinemap[i];
+ new_dims[i] = PyArray_DIM(op, idim);
+ new_strides[i] += PyArray_STRIDE(op, idim);
+ }
+
+ /* If the label didn't say to combine axes, increment dest i */
+ if (combineoffset == 0) {
+ icombine++;
+ }
+ }
+
+ /* The compressed number of dimensions */
+ ndim = icombine;
+
+ Py_INCREF(PyArray_DESCR(op));
+ ret = (PyArrayObject *)PyArray_NewFromDescr(
+ Py_TYPE(op),
+ PyArray_DESCR(op),
+ ndim, new_dims, new_strides,
+ PyArray_DATA(op),
+ PyArray_ISWRITEABLE(op) ? NPY_WRITEABLE : 0,
+ (PyObject *)op);
+
+ if (ret == NULL) {
+ return NULL;
+ }
+ if (!PyArray_Check(ret)) {
+ Py_DECREF(ret);
+ PyErr_SetString(PyExc_RuntimeError,
+ "NewFromDescr failed to return an array");
+ return NULL;
+ }
+ PyArray_UpdateFlags(ret,
+ NPY_C_CONTIGUOUS|NPY_ALIGNED|NPY_F_CONTIGUOUS);
+ Py_INCREF(op);
+ PyArray_BASE(ret) = (PyObject *)op;
+
+ return ret;
+}
+
+static int
+prepare_op_axes(int ndim, int iop, char *labels, npy_intp *axes,
+ npy_intp ndim_iter, char *iter_labels, EINSUM_BROADCAST broadcast)
+{
+ int i, label, ibroadcast;
+
+ /* Regular broadcasting */
+ if (broadcast == BROADCAST_RIGHT) {
+ /* broadcast dimensions get placed in rightmost position */
+ ibroadcast = ndim-1;
+ for (i = ndim_iter-1; i >= 0; --i) {
+ label = iter_labels[i];
+ /*
+ * If it's an unlabeled broadcast dimension, choose
+ * the next broadcast dimension from the operand.
+ */
+ if (label == 0) {
+ while (ibroadcast >= 0 && labels[ibroadcast] != 0) {
+ --ibroadcast;
+ }
+ /*
+ * If we used up all the operand broadcast dimensions,
+ * extend it with a "newaxis"
+ */
+ if (ibroadcast < 0) {
+ axes[i] = -1;
+ }
+ /* Otherwise map to the broadcast axis */
+ else {
+ axes[i] = ibroadcast;
+ }
+ }
+ /* It's a labeled dimension, find the matching one */
+ else {
+ char *match = memchr(labels, label, ndim);
+ /* If the op doesn't have the label, broadcast it */
+ if (match == NULL) {
+ axes[i] = -1;
+ }
+ /* Otherwise use it */
+ else {
+ axes[i] = match - labels;
+ }
+ }
+ }
+ }
+ /* Reverse broadcasting */
+ else if (broadcast == BROADCAST_LEFT) {
+ /* broadcast dimensions get placed in leftmost position */
+ ibroadcast = 0;
+ for (i = 0; i < ndim_iter; ++i) {
+ label = iter_labels[i];
+ /*
+ * If it's an unlabeled broadcast dimension, choose
+ * the next broadcast dimension from the operand.
+ */
+ if (label == 0) {
+ while (ibroadcast < ndim && labels[ibroadcast] != 0) {
+ ++ibroadcast;
+ }
+ /*
+ * If we used up all the operand broadcast dimensions,
+ * extend it with a "newaxis"
+ */
+ if (ibroadcast >= ndim) {
+ axes[i] = -1;
+ }
+ /* Otherwise map to the broadcast axis */
+ else {
+ axes[i] = ibroadcast;
+ }
+ }
+ /* It's a labeled dimension, find the matching one */
+ else {
+ char *match = memchr(labels, label, ndim);
+ /* If the op doesn't have the label, broadcast it */
+ if (match == NULL) {
+ axes[i] = -1;
+ }
+ /* Otherwise use it */
+ else {
+ axes[i] = match - labels;
+ }
+ }
+ }
+ }
+ /* Middle broadcasting */
+ else {
+ /* broadcast dimensions get placed in leftmost position */
+ ibroadcast = 0;
+ for (i = 0; i < ndim_iter; ++i) {
+ label = iter_labels[i];
+ /*
+ * If it's an unlabeled broadcast dimension, choose
+ * the next broadcast dimension from the operand.
+ */
+ if (label == 0) {
+ while (ibroadcast < ndim && labels[ibroadcast] != 0) {
+ ++ibroadcast;
+ }
+ /*
+ * If we used up all the operand broadcast dimensions,
+ * it's an error
+ */
+ if (ibroadcast >= ndim) {
+ PyErr_Format(PyExc_ValueError,
+ "operand %d did not have enough dimensions "
+ "to match the broadcasting, and couldn't be "
+ "extended because einstein sum subscripts "
+ "were specified at both the start and end",
+ iop);
+ return 0;
+ }
+ /* Otherwise map to the broadcast axis */
+ else {
+ axes[i] = ibroadcast;
+ }
+ }
+ /* It's a labeled dimension, find the matching one */
+ else {
+ char *match = memchr(labels, label, ndim);
+ /* If the op doesn't have the label, broadcast it */
+ if (match == NULL) {
+ axes[i] = -1;
+ }
+ /* Otherwise use it */
+ else {
+ axes[i] = match - labels;
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+
/*NUMPY_API
* This function provides summation of array elements according to
* the Einstein summation convention. For example:
@@ -312,9 +680,11 @@ parse_output_subscripts(char *subscripts, int length,
* then c[i,j] = a[j]*b[i].
*
* Alternatively, you can control the output order or prevent
- * an axis from being summed by providing indices for the output.
- * This allows us to turn 'trace' into 'diag', for example.
+ * an axis from being summed/force an axis to be summed by providing
+ * indices for the output. This allows us to turn 'trace' into
+ * 'diag', for example.
* - diag(a) -> einsum("ii->i", a)
+ * - sum(a, axis=0) -> einsum("i...->", a)
*
* Subscripts at the beginning and end may be specified by
* putting an ellipsis "..." in the middle. For example,
@@ -337,12 +707,21 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
int iop, label, min_label = 127, max_label = 0, num_labels;
char label_counts[128];
char op_labels[NPY_MAXARGS][NPY_MAXDIMS];
- char output_labels[NPY_MAXDIMS];
- int idim, ndim_output, ndim_broadcast;
+ char output_labels[NPY_MAXDIMS], *iter_labels;
+ int idim, ndim_output, ndim_broadcast, ndim_iter;
- PyArrayObject *op[NPY_MAXARGS];
+ EINSUM_BROADCAST broadcast[NPY_MAXARGS];
+ PyArrayObject *op[NPY_MAXARGS], *ret = NULL;
+ PyArray_Descr *op_dtypes_array[NPY_MAXARGS], **op_dtypes;
- if (nop > NPY_MAXARGS) {
+ npy_intp op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
+ npy_intp *op_axes[NPY_MAXARGS];
+ npy_uint32 op_flags[NPY_MAXARGS];
+
+ NpyIter *iter;
+
+ /* nop+1 (+1 is for the output) must fit in NPY_MAXARGS */
+ if (nop >= NPY_MAXARGS) {
PyErr_SetString(PyExc_ValueError,
"too many operands provided to einstein sum function");
return NULL;
@@ -376,7 +755,8 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
if (!parse_operand_subscripts(subscripts, length,
PyArray_NDIM(op_in[iop]),
iop, op_labels[iop], label_counts,
- &min_label, &max_label, &num_labels)) {
+ &min_label, &max_label, &num_labels,
+ &broadcast[iop])) {
return NULL;
}
@@ -433,7 +813,7 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
/* Parse the output subscript string */
ndim_output = parse_output_subscripts(outsubscripts, length,
ndim_broadcast, label_counts,
- output_labels);
+ output_labels, &broadcast[nop]);
}
else {
if (subscripts[0] != '-' || subscripts[1] != '>') {
@@ -447,12 +827,20 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
/* Parse the output subscript string */
ndim_output = parse_output_subscripts(subscripts, strlen(subscripts),
ndim_broadcast, label_counts,
- output_labels);
+ output_labels, &broadcast[nop]);
}
if (ndim_output < 0) {
return NULL;
}
+ if (out != NULL && PyArray_NDIM(out) != ndim_output) {
+ PyErr_Format(PyExc_ValueError,
+ "out parameter does not have the correct number of "
+ "dimensions, has %d but should have %d",
+ (int)PyArray_NDIM(out), (int)ndim_output);
+ return NULL;
+ }
+
/* Set all the op references to NULL */
for (iop = 0; iop < nop; ++iop) {
op[iop] = NULL;
@@ -464,8 +852,6 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
*/
printf("Processing inputs\n");
for (iop = 0; iop < nop; ++iop) {
- npy_intp new_strides[NPY_MAXDIMS];
- npy_intp new_dims[NPY_MAXDIMS];
char *labels = op_labels[iop];
int combine, ndim;
@@ -476,86 +862,16 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
* first try remapping the axes to the output to return
* a view instead of a copy.
*/
- if (nop == 1 && out == NULL) {
- char *out_label;
- int i, ibroadcast = 0;
-
- /* Initialize the dimensions and strides to zero */
- for (idim = 0; idim < ndim_output; ++idim) {
- new_dims[idim] = 0;
- new_strides[idim] = 0;
- }
- /* Match the labels in the operand with the output labels */
- for (idim = 0; idim < ndim; ++idim) {
- printf("Matching label for dimension %d\n", idim);
- label = labels[idim];
- /* If this label says to merge axes, get the actual label */
- if (label < 0) {
- label = labels[idim+label];
- }
- /* If the label is 0, it's an unlabeled broadcast dimension */
- if (label == 0) {
- /* The next output label that's a broadcast dimension */
- for (; ibroadcast < ndim_output; ++ibroadcast) {
- if (output_labels[ibroadcast] == 0) {
- break;
- }
- }
- if (ibroadcast == ndim_output) {
- PyErr_SetString(PyExc_ValueError,
- "output had too few broadcast dimensions");
- return NULL;
- }
- new_dims[ibroadcast] = PyArray_DIM(op_in[iop], idim);
- new_strides[ibroadcast] = PyArray_STRIDE(op_in[iop], idim);
- ++ibroadcast;
- }
- else {
- /* Find the position for this dimension in the output */
- out_label = (char *)memchr(output_labels, label,
- ndim_output);
- /* If it's not found, reduction -> can't return a view */
- if (out_label == NULL) {
- break;
- }
- /* Update the dimensions and strides of the output */
- i = out_label - output_labels;
- if (new_dims[i] != 0 &&
- new_dims[i] != PyArray_DIM(op_in[iop], idim)) {
- PyErr_Format(PyExc_ValueError,
- "dimensions in operand %d for collapsing "
- "index '%c' don't match (%d != %d)",
- iop, label, (int)new_dims[i],
- (int)PyArray_DIM(op_in[iop], idim));
- return NULL;
- }
- new_dims[i] = PyArray_DIM(op_in[iop], idim);
- new_strides[i] += PyArray_STRIDE(op_in[iop], idim);
- }
+ if (iop == 0 && nop == 1 && out == NULL) {
+ PyArrayObject *ret = NULL;
+
+ if (!get_single_op_view(op_in[iop], iop, labels,
+ ndim_output, output_labels,
+ &ret)) {
+ return NULL;
}
- /* If we processed all the input axes, return a view */
- if (idim == ndim) {
- printf("Returning a view\n");
- Py_INCREF(PyArray_DESCR(op_in[iop]));
- op[iop] = (PyArrayObject *)PyArray_NewFromDescr(
- Py_TYPE(op_in[iop]),
- PyArray_DESCR(op_in[iop]),
- ndim_output, new_dims, new_strides,
- PyArray_DATA(op_in[iop]),
- 0, (PyObject *)op_in[iop]);
-
- if (op[iop] == NULL) {
- return NULL;
- }
- if (!PyArray_Check(op[iop])) {
- Py_DECREF(op[iop]);
- PyErr_SetString(PyExc_RuntimeError,
- "NewFromDescr failed to return an array");
- return NULL;
- }
- Py_INCREF(op_in[iop]);
- PyArray_BASE(op[iop]) = (PyObject *)op_in[iop];
- return op[iop];
+ if (ret != NULL) {
+ return ret;
}
}
@@ -567,86 +883,115 @@ PyArray_EinsteinSum(char *subscripts, npy_intp nop,
}
}
- PyErr_SetString(PyExc_RuntimeError, "not implemented yet");
- return NULL;
+ /* If any dimensions are combined, create a view which combines them */
+ if (combine) {
+ op[iop] = get_combined_dims_view(op_in[iop], iop, labels);
+ if (op[iop] == NULL) {
+ goto fail;
+ }
+ }
+ /* No combining needed */
+ else {
+ Py_INCREF(op_in[iop]);
+ op[iop] = op_in[iop];
+ }
}
- PyErr_SetString(PyExc_RuntimeError, "not implemented yet");
- return NULL;
+ /* Set the output op */
+ op[nop] = out;
-#if 0
- /* Copy the dimensions before the labels without changes */
- for (idim = 0; idim < ndim-nlabels; ++i) {
- new_strides[idim] = PyArray_STRIDE(op[iop], idim);
- new_dims[idim] = PyArray_DIM(op[iop], idim);
- }
- /* Process the labels for the rest */
- for (ilabels = 0; ilabels < nlabels; ++ilabels) {
- label = labels[ilabels];
- if (label_counts[label] > 1) {
- /* If this label occurs earlier, merge it */
- for (jlabels = 0; jlabels < ilabels; ++jlabels) {
- if (labels[jlabels] == label) {
- if (new_dims[jlabels+ndims-nlabels] !=
- PyArray_DIM(op[iop], ilabels+ndims-nlabels)) {
- PyErr_Format(PyExc_ValueError,
- "operand %d has a repeated label, but "
- "the dimensions for the label don't match",
- (int)iop);
- goto fail;
- }
- new_strides[jlabels+ndims-nlabels] +=
- PyArray_STRIDE(op[iop], ilabels+ndims-nlabels);
- /* Indicate this label should be removed */
- labels[ilabels] = 0;
- break;
- }
- }
- }
- }
- /* Collapse the labels that got cleared */
- jlabels = 0;
- for (ilabels = 0; ilabels < nlabels; ++ilabels) {
- if (labels[ilabels] != 0) {
- if (jlabels != ilabels) {
- labels[jlabels] = labels[ilabels];
- i = ilabels+ndims-nlabels;
- new_strides[idim] = new_strides[i];
- new_dims[idim] = new_dims[i];
- }
- idim++;
- jlabels++;
- }
- else {
- op_nlabels[iop]--;
- }
- }
- /* If the dimensions shrunk, make a new view */
- if (idim != ndim) {
- ndim = idim;
- Py_INCREF(PyArray_DESCR(op_in[iop]));
- op[iop] = PyArray_NewFromDescr(&PyArray_Type,
- PyArray_DESCR(op_in[iop]),
- ndim, new_dims, new_strides,
- PyArray_DATA(op_in[iop]),
- 0, NULL);
- if (op[iop] == NULL) {
+ /*
+ * Set up the labels for the iterator (output + combined labels).
+ * Can just share the output_labels memory, because iter_labels
+ * is output_labels with some more labels appended.
+ */
+ iter_labels = output_labels;
+ ndim_iter = ndim_output;
+ for (label = min_label; label <= max_label; ++label) {
+ if (label_counts[label] > 0 &&
+ memchr(output_labels, label, ndim_output) == NULL) {
+ if (ndim_iter >= NPY_MAXDIMS) {
+ PyErr_SetString(PyExc_ValueError,
+ "too many subscripts in einsum");
goto fail;
}
- Py_INCREF(op_in[iop]);
- PyArray_BASE(op[iop]) = (PyObject *)op_in[iop];
+ iter_labels[ndim_iter++] = label;
}
- /* Otherwise just take a reference */
- else {
- op[iop] = op_in[iop]
- Py_INCREF(op[iop]);
+ }
+
+ /* Set up the op_axes for the iterator */
+ for (iop = 0; iop < nop; ++iop) {
+ op_axes[iop] = op_axes_arrays[iop];
+
+ if (!prepare_op_axes(PyArray_NDIM(op[iop]), iop, op_labels[iop],
+ op_axes[iop], ndim_iter, iter_labels, broadcast[iop])) {
+ goto fail;
}
}
-goto fail;
+
+ /* Set up the op_dtypes if dtype was provided */
+ if (dtype == NULL) {
+ op_dtypes = NULL;
+ }
+ else {
+ op_dtypes = op_dtypes_array;
+ for (iop = 0; iop <= nop; ++iop) {
+ op_dtypes[iop] = dtype;
+ }
+ }
+
+ /* Set the op_axes for the output */
+ op_axes[nop] = op_axes_arrays[nop];
+ for (idim = 0; idim < ndim_output; ++idim) {
+ op_axes[nop][idim] = idim;
+ }
+ for (idim = ndim_output; idim < ndim_iter; ++idim) {
+ op_axes[nop][idim] = -1;
+ }
+
+ /* Set the iterator per-op flags */
+
+ for (iop = 0; iop < nop; ++iop) {
+ op_flags[iop] = NPY_ITER_READONLY|
+ NPY_ITER_NBO|
+ NPY_ITER_ALIGNED;
+ }
+ op_flags[nop] = NPY_ITER_READWRITE|
+ NPY_ITER_NBO|
+ NPY_ITER_ALIGNED|
+ NPY_ITER_ALLOCATE|
+ NPY_ITER_NO_BROADCAST;
+
+ /* Allocate the iterator */
+ iter = NpyIter_MultiNew(nop+1, op, NPY_ITER_NO_INNER_ITERATION|
+ ((dtype == NULL) ? 0 : NPY_ITER_COMMON_DTYPE)|
+ NPY_ITER_BUFFERED|
+ NPY_ITER_DELAY_BUFALLOC|
+ NPY_ITER_GROWINNER|
+ NPY_ITER_REDUCE_OK,
+ order, casting,
+ op_flags, op_dtypes,
+ ndim_iter, op_axes, 0);
+
+ if (iter == NULL) {
+ goto fail;
+ }
+
+ /* Initialize the output to all zeros */
+ ret = NpyIter_GetOperandArray(iter)[nop];
+ Py_INCREF(ret);
+ PyArray_FillWithZero(ret);
+ NpyIter_Reset(iter, NULL);
+
+ NpyIter_DebugPrint(iter);
+ NpyIter_Deallocate(iter);
+
+ return ret;
+
+fail:
for (iop = 0; iop < nop; ++iop) {
Py_XDECREF(op[iop]);
}
return NULL;
-#endif
}
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src
index 78725f8c9..30ca459ab 100644
--- a/numpy/core/src/multiarray/new_iterator.c.src
+++ b/numpy/core/src/multiarray/new_iterator.c.src
@@ -18,7 +18,7 @@
#include "lowlevel_strided_loops.h"
/********** PRINTF DEBUG TRACING **************/
-#define NPY_IT_DBG_TRACING 0
+#define NPY_IT_DBG_TRACING 1
#if NPY_IT_DBG_TRACING
#define NPY_IT_DBG_PRINTF(...) printf(__VA_ARGS__)
@@ -2555,6 +2555,8 @@ npyiter_prepare_one_operand(PyArrayObject **op,
Py_DECREF(*op_dtype);
*op_dtype = nbo_dtype;
+ NPY_IT_DBG_PRINTF("Iterator: Setting NPY_OP_ITFLAG_CAST "
+ "because of NPY_ITER_NBO\n");
/* Indicate that byte order or alignment needs fixing */
*op_itflags |= NPY_OP_ITFLAG_CAST;
}
@@ -2563,6 +2565,8 @@ npyiter_prepare_one_operand(PyArrayObject **op,
if (op_flags&NPY_ITER_ALIGNED) {
/* Check alignment */
if (!PyArray_ISALIGNED(*op)) {
+ NPY_IT_DBG_PRINTF("Iterator: Setting NPY_OP_ITFLAG_CAST "
+ "because of NPY_ITER_ALIGNED\n");
*op_itflags |= NPY_OP_ITFLAG_CAST;
}
}
@@ -2709,6 +2713,8 @@ npyiter_check_casting(npy_intp niter, PyArrayObject **op,
return 0;
}
+ NPY_IT_DBG_PRINTF("Iterator: Setting NPY_OP_ITFLAG_CAST "
+ "because the types aren't equivalent\n");
/* Indicate that this operand needs casting */
op_itflags[iiter] |= NPY_OP_ITFLAG_CAST;
}
@@ -3784,6 +3790,9 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
}
if (i >= 0) {
+ NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d "
+ "for iterator dimension %d to %d\n", (int)i,
+ (int)idim, (int)stride);
strides[i] = stride;
if (p < 0) {
reversestride[i] = 1;
@@ -3808,7 +3817,13 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
axisdata = NIT_AXISDATA(iter);
for (idim = 0; idim < op_ndim; ++idim) {
+ NPY_IT_DBG_PRINTF("Iterator: Checking allocated output "
+ "dimension %d with stride %d\n",
+ (int)idim, (int)strides[idim]);
if (strides[idim] == NPY_MAX_INTP) {
+ NPY_IT_DBG_PRINTF("Iterator: The axis wasn't used, "
+ "and its dimension is %d\n",
+ (int)NAD_SHAPE(axisdata));
/*
* If deleting this axis produces a reduction, but
* reduction wasn't enabled, throw an error
@@ -3827,6 +3842,8 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
return NULL;
}
+ NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction "
+ "is occurring\n");
/* Indicate that a reduction is occurring */
NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
}
@@ -4067,6 +4084,8 @@ npyiter_allocate_arrays(NpyIter *iter,
npy_intp stride = NAD_STRIDES(axisdata)[iiter];
if (stride != op_dtype[iiter]->elsize) {
+ NPY_IT_DBG_PRINTF("Iterator: Setting NPY_OP_ITFLAG_CAST "
+ "because of NPY_ITER_CONTIG\n");
op_itflags[iiter] |= NPY_OP_ITFLAG_CAST;
if (!(itflags&NPY_ITFLAG_BUFFER)) {
PyErr_SetString(PyExc_TypeError,
@@ -4142,7 +4161,9 @@ npyiter_allocate_arrays(NpyIter *iter,
}
else if (!is_one_to_one &&
(op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) &&
- !(itflags&NPY_ITFLAG_REDUCE)) {
+ !(flags&NPY_ITER_REDUCE_OK)) {
+ NPY_IT_DBG_PRINTF("Iterator: %d %d %d\n",
+ (int)(!is_one_to_one), (int)((op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)), (int)(!(flags&NPY_ITER_REDUCE_OK)));
PyErr_SetString(PyExc_ValueError,
"Iterator operand requires write buffering, "
"but has dimensions which have been broadcasted "
@@ -4856,6 +4877,8 @@ NpyIter_DebugPrint(NpyIter *iter)
printf("DELAYBUF ");
if (itflags&NPY_ITFLAG_NEEDSAPI)
printf("NEEDSAPI ");
+ if (itflags&NPY_ITFLAG_REDUCE)
+ printf("REDUCE ");
printf("\n");
printf("NDim: %d\n", (int)ndim);
printf("NIter: %d\n", (int)niter);
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 5937c6e9a..62f10693c 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -208,6 +208,7 @@ class TestEinSum(TestCase):
assert_raises(ValueError, np.einsum, "ij->jij", [[0,0],[0,0]])
# dimensions much match when being collapsed
+ assert_raises(ValueError, np.einsum, "ii", np.arange(6).reshape(2,3))
assert_raises(ValueError, np.einsum, "ii->i", np.arange(6).reshape(2,3))
def test_einsum_views(self):