summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/multiarray/new_iterator.c.src795
1 files changed, 430 insertions, 365 deletions
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src
index 06daed3ba..ee1d0b439 100644
--- a/numpy/core/src/multiarray/new_iterator.c.src
+++ b/numpy/core/src/multiarray/new_iterator.c.src
@@ -31,7 +31,7 @@
#define NPY_ITFLAG_NOINNER 0x020
/* The iterator is buffered */
#define NPY_ITFLAG_BUFFER 0x040
-/* The iterator is buffered */
+/* The iterator should grow the buffered inner loop when possible */
#define NPY_ITFLAG_GROWINNER 0x080
/* Internal iterator per-operand iterator flags */
@@ -51,10 +51,6 @@
/* The operand is aligned */
#define NPY_OP_ITFLAG_ALIGNED 0x40
-/* Internal flag, for the type of operands */
-#define NPY_ITER_OP_ARRAY 0
-#define NPY_ITER_OP_NULL 1
-
/*
* The data layout of the iterator is fully specified by
* a triple (itflags, ndim, niter). These three variables
@@ -212,12 +208,19 @@ struct NpyIter_AD {
/* Internal helper functions */
static int
-pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);
+npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);
+static int
+npyiter_check_op_axes(npy_intp niter, npy_intp oa_ndim, npy_intp **op_axes);
static int
npyiter_check_per_op_flags(npy_uint32 flags, char *op_itflags);
static int
-pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype,
- PyArray_Descr** op_dtype, int* op_type,
+npyiter_prepare_operands(npy_intp niter, npy_intp *ndim, PyArrayObject **op_in,
+ PyArrayObject **op, PyArray_Descr **op_request_dtypes,
+ PyArray_Descr **op_dtype,
+ npy_intp *op_ndim, npy_uint32 *op_flags, char *op_itflags);
+static int
+npyiter_prepare_one_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype,
+ PyArray_Descr** op_dtype,
npy_intp* op_ndim,
npy_uint32 op_flags, char *op_itflags);
static int
@@ -263,6 +266,15 @@ static PyArrayObject *
npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
npy_intp op_ndim, npy_intp *shape,
PyArray_Descr *op_dtype, npy_intp *op_axes);
+static int
+npyiter_allocate_arrays(NpyIter *iter, PyArrayObject **op,
+ PyArray_Descr **op_dtype, PyTypeObject *subtype,
+ npy_uint32 *op_flags, char *op_itflags,
+ npy_intp *op_ndim, npy_intp **op_axes);
+static void
+npyiter_get_priority_subtype(PyArrayObject **op, char *op_itflags,
+ npy_intp niter, double *subtype_priority,
+ PyTypeObject **subtype);
static int
npyiter_allocate_buffers(NpyIter *iter);
@@ -291,16 +303,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
/* Per-operand values */
PyArrayObject *op[NPY_MAXARGS];
PyArray_Descr *op_dtype[NPY_MAXARGS];
- int op_type[NPY_MAXARGS];
char op_itflags[NPY_MAXARGS];
npy_intp op_ndim[NPY_MAXARGS];
char **op_dataptr;
npy_intp *perm;
NpyIter_BufferData *bufferdata = NULL;
- char axes_dupcheck[NPY_MAXDIMS];
- int any_allocate_if_null = 0, any_missing_dtypes = 0,
- allocate_output_scalars = 0;
+ int any_allocate = 0, any_missing_dtypes = 0,
+ allocate_output_scalars = 0, need_subtype = 0;
+
/* The subtype for automatically allocated outputs */
double subtype_priority = NPY_PRIORITY;
PyTypeObject *subtype = &PyArray_Type;
@@ -313,59 +324,12 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
}
/* Error check 'oa_ndim' and 'op_axes', which must be used together */
- if (oa_ndim == 0 && op_axes != NULL) {
- PyErr_Format(PyExc_ValueError,
- "If 'op_axes' is not NULL in the iterator constructor, "
- "'oa_ndim' must be greater than zero");
+ if (!npyiter_check_op_axes(niter, oa_ndim, op_axes)) {
return NULL;
}
- else if (oa_ndim > 0) {
- if (oa_ndim > NPY_MAXDIMS) {
- PyErr_Format(PyExc_ValueError,
- "Cannot construct an iterator with more than %d dimensions "
- "(%d were requested for op_axes)",
- (int)NPY_MAXDIMS, (int)oa_ndim);
- return NULL;
- }
- else if (op_axes == NULL) {
- PyErr_Format(PyExc_ValueError,
- "If 'oa_ndim' is greater than zero in the iterator "
- "constructor, then op_axes cannot be NULL");
- return NULL;
- }
- /* Check that there are no duplicates in op_axes */
- for (iiter = 0; iiter < niter; ++iiter) {
- npy_intp *axes = op_axes[iiter];
- if (axes != NULL) {
- memset(axes_dupcheck, 0, NPY_MAXDIMS);
- for (idim = 0; idim < oa_ndim; ++idim) {
- npy_intp i = axes[idim];
- if (i >= 0) {
- if (i >= NPY_MAXDIMS) {
- PyErr_Format(PyExc_ValueError,
- "The 'op_axes' provided to the iterator "
- "constructor contained invalid "
- "values %d", (int)i);
- return NULL;
- } else if(axes_dupcheck[i] == 1) {
- PyErr_Format(PyExc_ValueError,
- "The 'op_axes' provided to the iterator "
- "constructor contained duplicate "
- "value %d", (int)i);
- return NULL;
- }
- else {
- axes_dupcheck[i] = 1;
- }
- }
- }
- }
- }
- }
-
- /* Checks the global iterator flags */
- if (!pyiter_check_global_flags(flags, &itflags)) {
+ /* Check the global iterator flags */
+ if (!npyiter_check_global_flags(flags, &itflags)) {
return NULL;
}
@@ -374,76 +338,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
* chosen to be big enough to get some amortization benefits, but
* small enough to be cache-friendly.
*/
- if (itflags&NPY_ITFLAG_BUFFER && buffersize <= 0) {
+ if ((itflags&NPY_ITFLAG_BUFFER) && buffersize <= 0) {
buffersize = 1 << 12;
}
/* Prepare all the operands */
- for (iiter = 0; iiter < niter; ++iiter) {
- /*
- * Make a copy of the input operands so we can substitute
- * new values in place when necessary without affecting
- * the caller's array.
- */
- op[iiter] = op_in[iiter];
- Py_XINCREF(op[iiter]);
- op_dtype[iiter] = NULL;
-
- /* Check the readonly/writeonly flags, and fill in op_itflags */
- if (!npyiter_check_per_op_flags(op_flags[iiter], &op_itflags[iiter])) {
- npy_intp i;
-
- for (i = 0; i <= iiter; ++i) {
- Py_XDECREF(op[i]);
- Py_XDECREF(op_dtype[i]);
- }
- return NULL;
- }
-
- /*
- * Prepare the operand. This produces an op_dtype[iiter] reference
- * on success.
- */
- if (!pyiter_prepare_operand(&op[iiter],
- op_request_dtypes ? op_request_dtypes[iiter] : NULL,
- &op_dtype[iiter], &op_type[iiter],
- &op_ndim[iiter],
- op_flags[iiter], &op_itflags[iiter])) {
- npy_intp i;
-
- for (i = 0; i <= iiter; ++i) {
- Py_XDECREF(op[i]);
- Py_XDECREF(op_dtype[i]);
- }
- return NULL;
- }
- /* The iterator dimensions is the maximum of all the inputs */
- if (op_ndim[iiter] > ndim) {
- ndim = op_ndim[iiter];
- }
- }
-
-
- /* If all the operands were NULL, it's an error */
- if (op_type[0] == NPY_ITER_OP_NULL) {
- int all_null = 1;
- for (iiter = 1; iiter < niter; ++iiter) {
- if (op_type[iiter] != NPY_ITER_OP_NULL) {
- all_null = 0;
- break;
- }
- }
- if (all_null) {
- npy_intp i;
-
- for (i = 0; i < niter; ++i) {
- Py_XDECREF(op[i]);
- Py_XDECREF(op_dtype[i]);
- }
- PyErr_SetString(PyExc_ValueError,
- "At least one iterator input must be non-NULL");
- return NULL;
- }
+ if (!npyiter_prepare_operands(niter, &ndim, op_in, op,
+ op_request_dtypes, op_dtype,
+ op_ndim, op_flags, op_itflags)) {
+ return NULL;
}
/* If 'op_axes' is being used, force 'ndim' */
@@ -474,36 +377,37 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
NIT_BASEOFFSETS(iter)[iiter] = 0;
/* Get the data pointer for this operand */
- switch (op_type[iiter]) {
- case NPY_ITER_OP_ARRAY:
- /*
- * Array casting/copying is handled later, once the
- * iteration order is finalized. Here, we
- * optimistically assume the array will be used
- * as is.
- */
- op_dataptr[iiter] = PyArray_DATA(op[iiter]);
- break;
-
- case NPY_ITER_OP_NULL:
- op_dataptr[iiter] = NULL;
- /* Now that ndim is fixed, outputs get the full ndim */
- if (allocate_output_scalars) {
- op_ndim[iiter] = 0;
- }
- else {
- op_ndim[iiter] = ndim;
- }
- /* Flag this so later we can avoid flipping axes */
- any_allocate_if_null = 1;
- /*
- * If the data type wasn't provided, will need to
- * calculate it later.
- */
- if (op_dtype[iiter] == NULL) {
- any_missing_dtypes = 1;
- }
- break;
+ if (op[iiter] != NULL) {
+ /*
+ * Array casting/copying is handled later, once the
+ * iteration order is finalized. Here, we
+ * optimistically assume the array will be used
+ * as is.
+ */
+ op_dataptr[iiter] = PyArray_DATA(op[iiter]);
+ }
+ else {
+ op_dataptr[iiter] = NULL;
+ /* Now that ndim is fixed, outputs get the full ndim */
+ if (allocate_output_scalars) {
+ op_ndim[iiter] = 0;
+ }
+ else {
+ op_ndim[iiter] = ndim;
+ }
+ /* Flag this so later we can avoid flipping axes */
+ any_allocate = 1;
+ /* If a subtype may be used, indicate so */
+ if (!(op_flags[iiter]&NPY_ITER_NO_SUBTYPE)) {
+ need_subtype = 1;
+ }
+ /*
+ * If the data type wasn't provided, will need to
+ * calculate it later.
+ */
+ if (op_dtype[iiter] == NULL) {
+ any_missing_dtypes = 1;
+ }
}
}
/* Set resetindex to zero as well (it's just after the resetdataptr) */
@@ -569,28 +473,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
* If there's an output being allocated, we must not negate
* any strides.
*/
- if (!any_allocate_if_null) {
+ if (!any_allocate) {
npyiter_flip_negative_strides(iter);
}
itflags = NIT_ITFLAGS(iter);
}
- if (any_allocate_if_null) {
- /*
- * The __array_priority__ attribute of the inputs determines
- * the subtype of any output arrays. Take the subtype
- * with highest priority.
- */
- for (iiter = 0; iiter < niter; ++iiter) {
- if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) {
- double priority =
- PyArray_GetPriority((PyObject *)op[iiter], 0.0);
- if (priority > subtype_priority) {
- subtype_priority = priority;
- subtype = Py_TYPE(op[iiter]);
- }
- }
- }
+ if (need_subtype) {
+ npyiter_get_priority_subtype(op, op_itflags, niter,
+ &subtype_priority, &subtype);
}
/*
@@ -646,189 +537,10 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
* copying due to casting/byte order/alignment can be
* done now using a memory layout matching the iterator.
*/
- for (iiter = 0; iiter < niter; ++iiter) {
- if (op_type[iiter] == NPY_ITER_OP_NULL) {
- PyArrayObject *out;
- PyTypeObject *op_subtype;
-
- /* Check whether the subtype was disabled */
- if (op_flags[iiter]&NPY_ITER_NO_SUBTYPE) {
- op_subtype = &PyArray_Type;
- }
- else {
- op_subtype = subtype;
- }
-
- /* Allocate the output array, if possible */
- out = npyiter_new_temp_array(iter, op_subtype,
- op_ndim[iiter], NULL,
- op_dtype[iiter],
- op_axes ? op_axes[iiter] : NULL);
- if (out == NULL) {
- NpyIter_Deallocate(iter);
- return NULL;
- }
-
- op[iiter] = out;
- NIT_OBJECTS(iter)[iiter] = out;
-
- /*
- * Now we need to replace the pointers and strides with values
- * from the new array.
- */
- npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter],
- PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
-
- /* New arrays are aligned and need no swapping or casting */
- op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
- op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST);
- }
- else if ((op_itflags[iiter]&
- (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP)) &&
- (op_itflags[iiter]&NPY_OP_ITFLAG_COPY)) {
- PyArrayObject *temp;
-
- /* Allocate the temporary array, if possible */
- temp = npyiter_new_temp_array(iter, &PyArray_Type,
- PyArray_NDIM(op[iiter]),
- PyArray_DIMS(op[iiter]),
- op_dtype[iiter],
- op_axes ? op_axes[iiter] : NULL);
- if (temp == NULL) {
- NpyIter_Deallocate(iter);
- return NULL;
- }
-
- /* If the data will be read, copy it into temp */
- if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) {
- if (PyArray_CopyInto(temp, op[iiter]) != 0) {
- Py_DECREF(temp);
- NpyIter_Deallocate(iter);
- return NULL;
- }
- }
- /* If the data will be written to, set UPDATEIFCOPY */
- if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) {
- PyArray_FLAGS(temp) |= NPY_UPDATEIFCOPY;
- PyArray_FLAGS(op[iiter]) &= ~NPY_WRITEABLE;
- Py_INCREF(op[iiter]);
- temp->base = (PyObject *)op[iiter];
- }
-
- Py_DECREF(op[iiter]);
- op[iiter] = temp;
- NIT_OBJECTS(iter)[iiter] = temp;
-
- /*
- * Now we need to replace the pointers and strides with values
- * from the temporary array.
- */
- npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter],
- PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
-
- /* Now it is aligned, and no longer needs a swap or cast */
- op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
- op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST);
- }
- else {
- /*
- * Buffering must be enabled for casting/conversion if copy
- * wasn't specified.
- */
- if (op_itflags[iiter]&
- (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP) &&
- !(itflags&NPY_ITFLAG_BUFFER)) {
- PyErr_SetString(PyExc_TypeError,
- "Iterator input required copying or buffering, "
- "but neither copying nor buffering was enabled");
- NpyIter_Deallocate(iter);
- return NULL;
- }
-
- /*
- * If the operand is aligned, any buffering can use aligned
- * optimizations.
- */
- if (PyArray_ISALIGNED(op[iiter])) {
- op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
- }
- }
-
- /*
- * If no alignment, byte swap, or casting is needed, and
- * the inner stride of this operand works for the whole
- * array, we can set NPY_OP_ITFLAG_BUFNEVER.
- * But, if buffering is enabled, write-buffering must be
- * one-to-one, because the buffering write back won't combine
- * values correctly. This test doesn't catch everything, but it will
- * catch the most common case of a broadcasting a write-buffered
- * dimension.
- */
- if ((itflags&NPY_ITFLAG_BUFFER) &&
- (!(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
- NPY_OP_ITFLAG_COPYSWAP)) ||
- (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) {
- int is_one_to_one = 1;
- npy_intp stride, shape, innerstride = 0, innershape;
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- npy_intp sizeof_axisdata =
- NIT_SIZEOF_AXISDATA(itflags, ndim, niter);
- /* Find stride of the first non-empty shape */
- for (idim = 0; idim < ndim; ++idim) {
- innershape = NAD_SHAPE(axisdata);
- if (innershape != 1) {
- innerstride = NAD_STRIDES(axisdata)[iiter];
- if (innerstride == 0) {
- is_one_to_one = 0;
- }
- break;
- }
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
- ++idim;
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- /* Check that everything could have coalesced together */
- for (; idim < ndim; ++idim) {
- stride = NAD_STRIDES(axisdata)[iiter];
- shape = NAD_SHAPE(axisdata);
- if (shape != 1) {
- if (stride == 0) {
- is_one_to_one = 0;
- }
- /*
- * If N times the inner stride doesn't equal this
- * stride, the multi-dimensionality is needed.
- */
- if (innerstride*innershape != stride) {
- break;
- }
- else {
- innershape *= shape;
- }
- }
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
- /*
- * If we looped all the way to the end, one stride works.
- * Set that stride, because it may not belong to the first
- * dimension.
- */
- if (idim == ndim &&
- !(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
- NPY_OP_ITFLAG_COPYSWAP))) {
- op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
- NBF_STRIDES(bufferdata)[iiter] = innerstride;
- }
- else if (!is_one_to_one &&
- (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)) {
- PyErr_SetString(PyExc_ValueError,
- "Iterator operand requires write buffering, "
- "but has dimensions which have been broadcasted "
- "and would be combined incorrectly");
- NpyIter_Deallocate(iter);
- return NULL;
- }
- }
+ if (!npyiter_allocate_arrays(iter, op, op_dtype, subtype, op_flags,
+ op_itflags, op_ndim, op_axes)) {
+ NpyIter_Deallocate(iter);
+ return NULL;
}
/*
@@ -1895,7 +1607,7 @@ npy_intp* NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
* Returns 1 on success, 0 on error.
*/
static int
-pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
+npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
{
if ((flags&NPY_ITER_PER_OP_FLAGS) != 0) {
PyErr_SetString(PyExc_ValueError,
@@ -1944,6 +1656,68 @@ pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
return 1;
}
+static int
+npyiter_check_op_axes(npy_intp niter, npy_intp oa_ndim, npy_intp **op_axes)
+{
+ char axes_dupcheck[NPY_MAXDIMS];
+ npy_intp iiter, idim;
+
+ if (oa_ndim == 0 && op_axes != NULL) {
+ PyErr_Format(PyExc_ValueError,
+ "If 'op_axes' is not NULL in the iterator constructor, "
+ "'oa_ndim' must be greater than zero");
+ return 0;
+ }
+ else if (oa_ndim > 0) {
+ if (oa_ndim > NPY_MAXDIMS) {
+ PyErr_Format(PyExc_ValueError,
+ "Cannot construct an iterator with more than %d dimensions "
+ "(%d were requested for op_axes)",
+ (int)NPY_MAXDIMS, (int)oa_ndim);
+ return 0;
+ }
+ else if (op_axes == NULL) {
+ PyErr_Format(PyExc_ValueError,
+ "If 'oa_ndim' is greater than zero in the iterator "
+ "constructor, then op_axes cannot be NULL");
+ return 0;
+ }
+
+ /* Check that there are no duplicates in op_axes */
+ for (iiter = 0; iiter < niter; ++iiter) {
+ npy_intp *axes = op_axes[iiter];
+ if (axes != NULL) {
+ memset(axes_dupcheck, 0, NPY_MAXDIMS);
+ for (idim = 0; idim < oa_ndim; ++idim) {
+ npy_intp i = axes[idim];
+ if (i >= 0) {
+ if (i >= NPY_MAXDIMS) {
+ PyErr_Format(PyExc_ValueError,
+ "The 'op_axes' provided to the iterator "
+ "constructor for operand %d "
+ "contained invalid "
+ "values %d", (int)iiter, (int)i);
+ return 0;
+ } else if(axes_dupcheck[i] == 1) {
+ PyErr_Format(PyExc_ValueError,
+ "The 'op_axes' provided to the iterator "
+ "constructor for operand %d "
+ "contained duplicate "
+ "value %d", (int)iiter, (int)i);
+ return 0;
+ }
+ else {
+ axes_dupcheck[i] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
/*
* Checks the per-operand input flags, and fills in op_itflags.
*
@@ -2011,6 +1785,85 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, char *op_itflags)
}
/*
+ * Process all the operands, copying new references so further processing
+ * can replace the arrays if copying is necessary. Fill in the iterator's
+ * natural ndim.
+ */
+static int
+npyiter_prepare_operands(npy_intp niter, npy_intp *ndim, PyArrayObject **op_in,
+ PyArrayObject **op, PyArray_Descr **op_request_dtypes,
+ PyArray_Descr **op_dtype,
+ npy_intp *op_ndim, npy_uint32 *op_flags, char *op_itflags)
+{
+ npy_intp iiter;
+
+ *ndim = 0;
+
+ for (iiter = 0; iiter < niter; ++iiter) {
+ op[iiter] = op_in[iiter];
+ Py_XINCREF(op[iiter]);
+ op_dtype[iiter] = NULL;
+
+ /* Check the readonly/writeonly flags, and fill in op_itflags */
+ if (!npyiter_check_per_op_flags(op_flags[iiter], &op_itflags[iiter])) {
+ npy_intp i;
+
+ for (i = 0; i <= iiter; ++i) {
+ Py_XDECREF(op[i]);
+ Py_XDECREF(op_dtype[i]);
+ }
+ return 0;
+ }
+
+ /*
+ * Prepare the operand. This produces an op_dtype[iiter] reference
+ * on success.
+ */
+ if (!npyiter_prepare_one_operand(&op[iiter],
+ op_request_dtypes ? op_request_dtypes[iiter] : NULL,
+ &op_dtype[iiter], &op_ndim[iiter],
+ op_flags[iiter], &op_itflags[iiter])) {
+ npy_intp i;
+
+ for (i = 0; i <= iiter; ++i) {
+ Py_XDECREF(op[i]);
+ Py_XDECREF(op_dtype[i]);
+ }
+ return 0;
+ }
+ /* The iterator dimensions is the maximum of all the inputs */
+ if (op_ndim[iiter] > *ndim) {
+ *ndim = op_ndim[iiter];
+ }
+ }
+
+
+ /* If all the operands were NULL, it's an error */
+ if (op[0] == NULL) {
+ int all_null = 1;
+ for (iiter = 1; iiter < niter; ++iiter) {
+ if (op[iiter] != NULL) {
+ all_null = 0;
+ break;
+ }
+ }
+ if (all_null) {
+ npy_intp i;
+
+ for (i = 0; i < niter; ++i) {
+ Py_XDECREF(op[i]);
+ Py_XDECREF(op_dtype[i]);
+ }
+ PyErr_SetString(PyExc_ValueError,
+ "At least one iterator input must be non-NULL");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
* Returns 1 if the from -> to cast can be done, based on the casting
* flags provided in op_flags, and 0 otherwise.
*
@@ -2086,14 +1939,14 @@ npyiter_can_cast(PyArray_Descr *from, PyArray_Descr *to, NPY_CASTING casting)
/*
* Prepares a a constructor operand. Assumes a reference to 'op'
- * is owned, and that 'op' may be replaced. Fills in 'op_dtype',
- * 'op_type' and 'ndim'.
+ * is owned, and that 'op' may be replaced. Fills in 'op_dtype'
+ * and 'ndim'.
*
* Returns 1 on success, 0 on failure.
*/
static int
-pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype,
- PyArray_Descr **op_dtype, int* op_type,
+npyiter_prepare_one_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype,
+ PyArray_Descr **op_dtype,
npy_intp* op_ndim,
npy_uint32 op_flags, char *op_itflags)
{
@@ -2116,7 +1969,6 @@ pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype,
/* If a requested dtype was provided, use it, otherwise NULL */
Py_XINCREF(op_request_dtype);
*op_dtype = op_request_dtype;
- *op_type = NPY_ITER_OP_NULL;
*op_ndim = 0;
/* No copying of NULL operands */
*op_itflags &= ~NPY_OP_ITFLAG_COPY;
@@ -2132,7 +1984,6 @@ pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype,
return 0;
}
- *op_type = NPY_ITER_OP_ARRAY;
*op_ndim = PyArray_NDIM(*op);
/* PyArray_DESCR does not give us a reference */
*op_dtype = PyArray_DESCR(*op);
@@ -3267,6 +3118,220 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
return ret;
}
+static int
+npyiter_allocate_arrays(NpyIter *iter, PyArrayObject **op,
+ PyArray_Descr **op_dtype, PyTypeObject *subtype,
+ npy_uint32 *op_flags, char *op_itflags,
+ npy_intp *op_ndim, npy_intp **op_axes)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ npy_intp idim, ndim = NIT_NDIM(iter);
+ npy_intp iiter, niter = NIT_NITER(iter);
+
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+
+ for (iiter = 0; iiter < niter; ++iiter) {
+ if (op[iiter] == NULL) {
+ PyArrayObject *out;
+ PyTypeObject *op_subtype;
+
+ /* Check whether the subtype was disabled */
+ op_subtype = (op_flags[iiter]&NPY_ITER_NO_SUBTYPE) ?
+ &PyArray_Type : subtype;
+
+ /* Allocate the output array, if possible */
+ out = npyiter_new_temp_array(iter, op_subtype,
+ op_ndim[iiter], NULL,
+ op_dtype[iiter],
+ op_axes ? op_axes[iiter] : NULL);
+ if (out == NULL) {
+ return 0;
+ }
+
+ op[iiter] = out;
+ NIT_OBJECTS(iter)[iiter] = out;
+
+ /*
+ * Now we need to replace the pointers and strides with values
+ * from the new array.
+ */
+ npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter],
+ PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
+
+ /* New arrays are aligned and need no swapping or casting */
+ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
+ op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST);
+ }
+ else if ((op_itflags[iiter]&
+ (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP)) &&
+ (op_itflags[iiter]&NPY_OP_ITFLAG_COPY)) {
+ PyArrayObject *temp;
+
+ /* Allocate the temporary array, if possible */
+ temp = npyiter_new_temp_array(iter, &PyArray_Type,
+ PyArray_NDIM(op[iiter]),
+ PyArray_DIMS(op[iiter]),
+ op_dtype[iiter],
+ op_axes ? op_axes[iiter] : NULL);
+ if (temp == NULL) {
+ return 0;
+ }
+
+ /* If the data will be read, copy it into temp */
+ if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) {
+ if (PyArray_CopyInto(temp, op[iiter]) != 0) {
+ Py_DECREF(temp);
+ return 0;
+ }
+ }
+ /* If the data will be written to, set UPDATEIFCOPY */
+ if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) {
+ PyArray_FLAGS(temp) |= NPY_UPDATEIFCOPY;
+ PyArray_FLAGS(op[iiter]) &= ~NPY_WRITEABLE;
+ Py_INCREF(op[iiter]);
+ temp->base = (PyObject *)op[iiter];
+ }
+
+ Py_DECREF(op[iiter]);
+ op[iiter] = temp;
+ NIT_OBJECTS(iter)[iiter] = temp;
+
+ /*
+ * Now we need to replace the pointers and strides with values
+ * from the temporary array.
+ */
+ npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter],
+ PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
+
+ /* The temporary copy is aligned and needs no swap or cast */
+ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
+ op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST);
+ }
+ else {
+ /*
+ * Buffering must be enabled for casting/conversion if copy
+ * wasn't specified.
+ */
+ if (op_itflags[iiter]&
+ (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP) &&
+ !(itflags&NPY_ITFLAG_BUFFER)) {
+ PyErr_SetString(PyExc_TypeError,
+ "Iterator operand required copying or buffering, "
+ "but neither copying nor buffering was enabled");
+ return 0;
+ }
+
+ /*
+ * If the operand is aligned, any buffering can use aligned
+ * optimizations.
+ */
+ if (PyArray_ISALIGNED(op[iiter])) {
+ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
+ }
+ }
+
+ /*
+ * If no alignment, byte swap, or casting is needed, and
+ * the inner stride of this operand works for the whole
+ * array, we can set NPY_OP_ITFLAG_BUFNEVER.
+ * But, if buffering is enabled, write-buffering must be
+ * one-to-one, because the buffering write back won't combine
+ * values correctly. This test doesn't catch everything, but it will
+ * catch the most common case of a broadcasting a write-buffered
+ * dimension.
+ */
+ if ((itflags&NPY_ITFLAG_BUFFER) &&
+ (!(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
+ NPY_OP_ITFLAG_COPYSWAP)) ||
+ (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) {
+ int is_one_to_one = 1;
+ npy_intp stride, shape, innerstride = 0, innershape;
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ npy_intp sizeof_axisdata =
+ NIT_SIZEOF_AXISDATA(itflags, ndim, niter);
+ /* Find stride of the first non-empty shape */
+ for (idim = 0; idim < ndim; ++idim) {
+ innershape = NAD_SHAPE(axisdata);
+ if (innershape != 1) {
+ innerstride = NAD_STRIDES(axisdata)[iiter];
+ if (innerstride == 0) {
+ is_one_to_one = 0;
+ }
+ break;
+ }
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+ ++idim;
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ /* Check that everything could have coalesced together */
+ for (; idim < ndim; ++idim) {
+ stride = NAD_STRIDES(axisdata)[iiter];
+ shape = NAD_SHAPE(axisdata);
+ if (shape != 1) {
+ if (stride == 0) {
+ is_one_to_one = 0;
+ }
+ /*
+ * If N times the inner stride doesn't equal this
+ * stride, the multi-dimensionality is needed.
+ */
+ if (innerstride*innershape != stride) {
+ break;
+ }
+ else {
+ innershape *= shape;
+ }
+ }
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+ /*
+ * If we looped all the way to the end, one stride works.
+ * Set that stride, because it may not belong to the first
+ * dimension.
+ */
+ if (idim == ndim &&
+ !(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
+ NPY_OP_ITFLAG_COPYSWAP))) {
+ op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
+ NBF_STRIDES(bufferdata)[iiter] = innerstride;
+ }
+ else if (!is_one_to_one &&
+ (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Iterator operand requires write buffering, "
+ "but has dimensions which have been broadcasted "
+ "and would be combined incorrectly");
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * The __array_priority__ attribute of the inputs determines
+ * the subtype of any output arrays. This function finds the
+ * subtype of the input array with highest priority.
+ */
+static void
+npyiter_get_priority_subtype(PyArrayObject **op, char *op_itflags,
+ npy_intp niter, double *subtype_priority,
+ PyTypeObject **subtype)
+{
+ npy_intp iiter;
+
+ for (iiter = 0; iiter < niter; ++iiter) {
+ if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) {
+ double priority = PyArray_GetPriority((PyObject *)op[iiter], 0.0);
+ if (priority > *subtype_priority) {
+ *subtype_priority = priority;
+ *subtype = Py_TYPE(op[iiter]);
+ }
+ }
+ }
+}
+
/*
* Calculates a dtype that all the types can be promoted to, using the
* ufunc rules. If only_inputs is 1, it leaves any operands that