diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-05 11:22:19 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-09 01:55:02 -0800 |
commit | 1f75aa0d7e3caacbfeb4aa730cc4496c4727359e (patch) | |
tree | dbc9e1a4c757c5cc05940f8ce4f8dff8a39025e3 /numpy | |
parent | 45c009a3425e4c60e68fa3dbe16524ba9c19d3a8 (diff) | |
download | numpy-1f75aa0d7e3caacbfeb4aa730cc4496c4727359e.tar.gz |
ENH: iter: Move code out of NpyIter_MultiNew into functions
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 795 |
1 files changed, 430 insertions, 365 deletions
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 06daed3ba..ee1d0b439 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -31,7 +31,7 @@ #define NPY_ITFLAG_NOINNER 0x020 /* The iterator is buffered */ #define NPY_ITFLAG_BUFFER 0x040 -/* The iterator is buffered */ +/* The iterator should grow the buffered inner loop when possible */ #define NPY_ITFLAG_GROWINNER 0x080 /* Internal iterator per-operand iterator flags */ @@ -51,10 +51,6 @@ /* The operand is aligned */ #define NPY_OP_ITFLAG_ALIGNED 0x40 -/* Internal flag, for the type of operands */ -#define NPY_ITER_OP_ARRAY 0 -#define NPY_ITER_OP_NULL 1 - /* * The data layout of the iterator is fully specified by * a triple (itflags, ndim, niter). These three variables @@ -212,12 +208,19 @@ struct NpyIter_AD { /* Internal helper functions */ static int -pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags); +npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags); +static int +npyiter_check_op_axes(npy_intp niter, npy_intp oa_ndim, npy_intp **op_axes); static int npyiter_check_per_op_flags(npy_uint32 flags, char *op_itflags); static int -pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, - PyArray_Descr** op_dtype, int* op_type, +npyiter_prepare_operands(npy_intp niter, npy_intp *ndim, PyArrayObject **op_in, + PyArrayObject **op, PyArray_Descr **op_request_dtypes, + PyArray_Descr **op_dtype, + npy_intp *op_ndim, npy_uint32 *op_flags, char *op_itflags); +static int +npyiter_prepare_one_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, + PyArray_Descr** op_dtype, npy_intp* op_ndim, npy_uint32 op_flags, char *op_itflags); static int @@ -263,6 +266,15 @@ static PyArrayObject * npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, npy_intp op_ndim, npy_intp *shape, PyArray_Descr *op_dtype, npy_intp *op_axes); +static int +npyiter_allocate_arrays(NpyIter *iter, PyArrayObject **op, + PyArray_Descr **op_dtype, PyTypeObject *subtype, + npy_uint32 *op_flags, char *op_itflags, + npy_intp *op_ndim, npy_intp **op_axes); +static void +npyiter_get_priority_subtype(PyArrayObject **op, char *op_itflags, + npy_intp niter, double *subtype_priority, + PyTypeObject **subtype); static int npyiter_allocate_buffers(NpyIter *iter); @@ -291,16 +303,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, /* Per-operand values */ PyArrayObject *op[NPY_MAXARGS]; PyArray_Descr *op_dtype[NPY_MAXARGS]; - int op_type[NPY_MAXARGS]; char op_itflags[NPY_MAXARGS]; npy_intp op_ndim[NPY_MAXARGS]; char **op_dataptr; npy_intp *perm; NpyIter_BufferData *bufferdata = NULL; - char axes_dupcheck[NPY_MAXDIMS]; - int any_allocate_if_null = 0, any_missing_dtypes = 0, - allocate_output_scalars = 0; + int any_allocate = 0, any_missing_dtypes = 0, + allocate_output_scalars = 0, need_subtype = 0; + /* The subtype for automatically allocated outputs */ double subtype_priority = NPY_PRIORITY; PyTypeObject *subtype = &PyArray_Type; @@ -313,59 +324,12 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, } /* Error check 'oa_ndim' and 'op_axes', which must be used together */ - if (oa_ndim == 0 && op_axes != NULL) { - PyErr_Format(PyExc_ValueError, - "If 'op_axes' is not NULL in the iterator constructor, " - "'oa_ndim' must be greater than zero"); + if (!npyiter_check_op_axes(niter, oa_ndim, op_axes)) { return NULL; } - else if (oa_ndim > 0) { - if (oa_ndim > NPY_MAXDIMS) { - PyErr_Format(PyExc_ValueError, - "Cannot construct an iterator with more than %d dimensions " - "(%d were requested for op_axes)", - (int)NPY_MAXDIMS, (int)oa_ndim); - return NULL; - } - else if (op_axes == NULL) { - PyErr_Format(PyExc_ValueError, - "If 'oa_ndim' is greater than zero in the iterator " - "constructor, then op_axes cannot be NULL"); - return NULL; - } - /* Check that there are no duplicates in op_axes */ - for (iiter = 0; iiter < niter; ++iiter) { - npy_intp *axes = op_axes[iiter]; - if (axes != NULL) { - memset(axes_dupcheck, 0, NPY_MAXDIMS); - for (idim = 0; idim < oa_ndim; ++idim) { - npy_intp i = axes[idim]; - if (i >= 0) { - if (i >= NPY_MAXDIMS) { - PyErr_Format(PyExc_ValueError, - "The 'op_axes' provided to the iterator " - "constructor contained invalid " - "values %d", (int)i); - return NULL; - } else if(axes_dupcheck[i] == 1) { - PyErr_Format(PyExc_ValueError, - "The 'op_axes' provided to the iterator " - "constructor contained duplicate " - "value %d", (int)i); - return NULL; - } - else { - axes_dupcheck[i] = 1; - } - } - } - } - } - } - - /* Checks the global iterator flags */ - if (!pyiter_check_global_flags(flags, &itflags)) { + /* Check the global iterator flags */ + if (!npyiter_check_global_flags(flags, &itflags)) { return NULL; } @@ -374,76 +338,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, * chosen to be big enough to get some amortization benefits, but * small enough to be cache-friendly. */ - if (itflags&NPY_ITFLAG_BUFFER && buffersize <= 0) { + if ((itflags&NPY_ITFLAG_BUFFER) && buffersize <= 0) { buffersize = 1 << 12; } /* Prepare all the operands */ - for (iiter = 0; iiter < niter; ++iiter) { - /* - * Make a copy of the input operands so we can substitute - * new values in place when necessary without affecting - * the caller's array. - */ - op[iiter] = op_in[iiter]; - Py_XINCREF(op[iiter]); - op_dtype[iiter] = NULL; - - /* Check the readonly/writeonly flags, and fill in op_itflags */ - if (!npyiter_check_per_op_flags(op_flags[iiter], &op_itflags[iiter])) { - npy_intp i; - - for (i = 0; i <= iiter; ++i) { - Py_XDECREF(op[i]); - Py_XDECREF(op_dtype[i]); - } - return NULL; - } - - /* - * Prepare the operand. This produces an op_dtype[iiter] reference - * on success. - */ - if (!pyiter_prepare_operand(&op[iiter], - op_request_dtypes ? op_request_dtypes[iiter] : NULL, - &op_dtype[iiter], &op_type[iiter], - &op_ndim[iiter], - op_flags[iiter], &op_itflags[iiter])) { - npy_intp i; - - for (i = 0; i <= iiter; ++i) { - Py_XDECREF(op[i]); - Py_XDECREF(op_dtype[i]); - } - return NULL; - } - /* The iterator dimensions is the maximum of all the inputs */ - if (op_ndim[iiter] > ndim) { - ndim = op_ndim[iiter]; - } - } - - - /* If all the operands were NULL, it's an error */ - if (op_type[0] == NPY_ITER_OP_NULL) { - int all_null = 1; - for (iiter = 1; iiter < niter; ++iiter) { - if (op_type[iiter] != NPY_ITER_OP_NULL) { - all_null = 0; - break; - } - } - if (all_null) { - npy_intp i; - - for (i = 0; i < niter; ++i) { - Py_XDECREF(op[i]); - Py_XDECREF(op_dtype[i]); - } - PyErr_SetString(PyExc_ValueError, - "At least one iterator input must be non-NULL"); - return NULL; - } + if (!npyiter_prepare_operands(niter, &ndim, op_in, op, + op_request_dtypes, op_dtype, + op_ndim, op_flags, op_itflags)) { + return NULL; } /* If 'op_axes' is being used, force 'ndim' */ @@ -474,36 +377,37 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, NIT_BASEOFFSETS(iter)[iiter] = 0; /* Get the data pointer for this operand */ - switch (op_type[iiter]) { - case NPY_ITER_OP_ARRAY: - /* - * Array casting/copying is handled later, once the - * iteration order is finalized. Here, we - * optimistically assume the array will be used - * as is. - */ - op_dataptr[iiter] = PyArray_DATA(op[iiter]); - break; - - case NPY_ITER_OP_NULL: - op_dataptr[iiter] = NULL; - /* Now that ndim is fixed, outputs get the full ndim */ - if (allocate_output_scalars) { - op_ndim[iiter] = 0; - } - else { - op_ndim[iiter] = ndim; - } - /* Flag this so later we can avoid flipping axes */ - any_allocate_if_null = 1; - /* - * If the data type wasn't provided, will need to - * calculate it later. - */ - if (op_dtype[iiter] == NULL) { - any_missing_dtypes = 1; - } - break; + if (op[iiter] != NULL) { + /* + * Array casting/copying is handled later, once the + * iteration order is finalized. Here, we + * optimistically assume the array will be used + * as is. + */ + op_dataptr[iiter] = PyArray_DATA(op[iiter]); + } + else { + op_dataptr[iiter] = NULL; + /* Now that ndim is fixed, outputs get the full ndim */ + if (allocate_output_scalars) { + op_ndim[iiter] = 0; + } + else { + op_ndim[iiter] = ndim; + } + /* Flag this so later we can avoid flipping axes */ + any_allocate = 1; + /* If a subtype may be used, indicate so */ + if (!(op_flags[iiter]&NPY_ITER_NO_SUBTYPE)) { + need_subtype = 1; + } + /* + * If the data type wasn't provided, will need to + * calculate it later. + */ + if (op_dtype[iiter] == NULL) { + any_missing_dtypes = 1; + } } } /* Set resetindex to zero as well (it's just after the resetdataptr) */ @@ -569,28 +473,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, * If there's an output being allocated, we must not negate * any strides. */ - if (!any_allocate_if_null) { + if (!any_allocate) { npyiter_flip_negative_strides(iter); } itflags = NIT_ITFLAGS(iter); } - if (any_allocate_if_null) { - /* - * The __array_priority__ attribute of the inputs determines - * the subtype of any output arrays. Take the subtype - * with highest priority. - */ - for (iiter = 0; iiter < niter; ++iiter) { - if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) { - double priority = - PyArray_GetPriority((PyObject *)op[iiter], 0.0); - if (priority > subtype_priority) { - subtype_priority = priority; - subtype = Py_TYPE(op[iiter]); - } - } - } + if (need_subtype) { + npyiter_get_priority_subtype(op, op_itflags, niter, + &subtype_priority, &subtype); } /* @@ -646,189 +537,10 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, * copying due to casting/byte order/alignment can be * done now using a memory layout matching the iterator. */ - for (iiter = 0; iiter < niter; ++iiter) { - if (op_type[iiter] == NPY_ITER_OP_NULL) { - PyArrayObject *out; - PyTypeObject *op_subtype; - - /* Check whether the subtype was disabled */ - if (op_flags[iiter]&NPY_ITER_NO_SUBTYPE) { - op_subtype = &PyArray_Type; - } - else { - op_subtype = subtype; - } - - /* Allocate the output array, if possible */ - out = npyiter_new_temp_array(iter, op_subtype, - op_ndim[iiter], NULL, - op_dtype[iiter], - op_axes ? op_axes[iiter] : NULL); - if (out == NULL) { - NpyIter_Deallocate(iter); - return NULL; - } - - op[iiter] = out; - NIT_OBJECTS(iter)[iiter] = out; - - /* - * Now we need to replace the pointers and strides with values - * from the new array. - */ - npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter], - PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); - - /* New arrays are aligned and need no swapping or casting */ - op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; - op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST); - } - else if ((op_itflags[iiter]& - (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP)) && - (op_itflags[iiter]&NPY_OP_ITFLAG_COPY)) { - PyArrayObject *temp; - - /* Allocate the temporary array, if possible */ - temp = npyiter_new_temp_array(iter, &PyArray_Type, - PyArray_NDIM(op[iiter]), - PyArray_DIMS(op[iiter]), - op_dtype[iiter], - op_axes ? op_axes[iiter] : NULL); - if (temp == NULL) { - NpyIter_Deallocate(iter); - return NULL; - } - - /* If the data will be read, copy it into temp */ - if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) { - if (PyArray_CopyInto(temp, op[iiter]) != 0) { - Py_DECREF(temp); - NpyIter_Deallocate(iter); - return NULL; - } - } - /* If the data will be written to, set UPDATEIFCOPY */ - if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { - PyArray_FLAGS(temp) |= NPY_UPDATEIFCOPY; - PyArray_FLAGS(op[iiter]) &= ~NPY_WRITEABLE; - Py_INCREF(op[iiter]); - temp->base = (PyObject *)op[iiter]; - } - - Py_DECREF(op[iiter]); - op[iiter] = temp; - NIT_OBJECTS(iter)[iiter] = temp; - - /* - * Now we need to replace the pointers and strides with values - * from the temporary array. - */ - npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter], - PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); - - /* Now it is aligned, and no longer needs a swap or cast */ - op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; - op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST); - } - else { - /* - * Buffering must be enabled for casting/conversion if copy - * wasn't specified. - */ - if (op_itflags[iiter]& - (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP) && - !(itflags&NPY_ITFLAG_BUFFER)) { - PyErr_SetString(PyExc_TypeError, - "Iterator input required copying or buffering, " - "but neither copying nor buffering was enabled"); - NpyIter_Deallocate(iter); - return NULL; - } - - /* - * If the operand is aligned, any buffering can use aligned - * optimizations. - */ - if (PyArray_ISALIGNED(op[iiter])) { - op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; - } - } - - /* - * If no alignment, byte swap, or casting is needed, and - * the inner stride of this operand works for the whole - * array, we can set NPY_OP_ITFLAG_BUFNEVER. - * But, if buffering is enabled, write-buffering must be - * one-to-one, because the buffering write back won't combine - * values correctly. This test doesn't catch everything, but it will - * catch the most common case of a broadcasting a write-buffered - * dimension. - */ - if ((itflags&NPY_ITFLAG_BUFFER) && - (!(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST| - NPY_OP_ITFLAG_COPYSWAP)) || - (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) { - int is_one_to_one = 1; - npy_intp stride, shape, innerstride = 0, innershape; - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp sizeof_axisdata = - NIT_SIZEOF_AXISDATA(itflags, ndim, niter); - /* Find stride of the first non-empty shape */ - for (idim = 0; idim < ndim; ++idim) { - innershape = NAD_SHAPE(axisdata); - if (innershape != 1) { - innerstride = NAD_STRIDES(axisdata)[iiter]; - if (innerstride == 0) { - is_one_to_one = 0; - } - break; - } - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - ++idim; - NIT_ADVANCE_AXISDATA(axisdata, 1); - /* Check that everything could have coalesced together */ - for (; idim < ndim; ++idim) { - stride = NAD_STRIDES(axisdata)[iiter]; - shape = NAD_SHAPE(axisdata); - if (shape != 1) { - if (stride == 0) { - is_one_to_one = 0; - } - /* - * If N times the inner stride doesn't equal this - * stride, the multi-dimensionality is needed. - */ - if (innerstride*innershape != stride) { - break; - } - else { - innershape *= shape; - } - } - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - /* - * If we looped all the way to the end, one stride works. - * Set that stride, because it may not belong to the first - * dimension. - */ - if (idim == ndim && - !(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST| - NPY_OP_ITFLAG_COPYSWAP))) { - op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; - NBF_STRIDES(bufferdata)[iiter] = innerstride; - } - else if (!is_one_to_one && - (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)) { - PyErr_SetString(PyExc_ValueError, - "Iterator operand requires write buffering, " - "but has dimensions which have been broadcasted " - "and would be combined incorrectly"); - NpyIter_Deallocate(iter); - return NULL; - } - } + if (!npyiter_allocate_arrays(iter, op, op_dtype, subtype, op_flags, + op_itflags, op_ndim, op_axes)) { + NpyIter_Deallocate(iter); + return NULL; } /* @@ -1895,7 +1607,7 @@ npy_intp* NpyIter_GetInnerLoopSizePtr(NpyIter *iter) * Returns 1 on success, 0 on error. */ static int -pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags) +npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags) { if ((flags&NPY_ITER_PER_OP_FLAGS) != 0) { PyErr_SetString(PyExc_ValueError, @@ -1944,6 +1656,68 @@ pyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags) return 1; } +static int +npyiter_check_op_axes(npy_intp niter, npy_intp oa_ndim, npy_intp **op_axes) +{ + char axes_dupcheck[NPY_MAXDIMS]; + npy_intp iiter, idim; + + if (oa_ndim == 0 && op_axes != NULL) { + PyErr_Format(PyExc_ValueError, + "If 'op_axes' is not NULL in the iterator constructor, " + "'oa_ndim' must be greater than zero"); + return 0; + } + else if (oa_ndim > 0) { + if (oa_ndim > NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "Cannot construct an iterator with more than %d dimensions " + "(%d were requested for op_axes)", + (int)NPY_MAXDIMS, (int)oa_ndim); + return 0; + } + else if (op_axes == NULL) { + PyErr_Format(PyExc_ValueError, + "If 'oa_ndim' is greater than zero in the iterator " + "constructor, then op_axes cannot be NULL"); + return 0; + } + + /* Check that there are no duplicates in op_axes */ + for (iiter = 0; iiter < niter; ++iiter) { + npy_intp *axes = op_axes[iiter]; + if (axes != NULL) { + memset(axes_dupcheck, 0, NPY_MAXDIMS); + for (idim = 0; idim < oa_ndim; ++idim) { + npy_intp i = axes[idim]; + if (i >= 0) { + if (i >= NPY_MAXDIMS) { + PyErr_Format(PyExc_ValueError, + "The 'op_axes' provided to the iterator " + "constructor for operand %d " + "contained invalid " + "values %d", (int)iiter, (int)i); + return 0; + } else if(axes_dupcheck[i] == 1) { + PyErr_Format(PyExc_ValueError, + "The 'op_axes' provided to the iterator " + "constructor for operand %d " + "contained duplicate " + "value %d", (int)iiter, (int)i); + return 0; + } + else { + axes_dupcheck[i] = 1; + } + } + } + } + } + } + + return 1; +} + /* * Checks the per-operand input flags, and fills in op_itflags. * @@ -2011,6 +1785,85 @@ npyiter_check_per_op_flags(npy_uint32 op_flags, char *op_itflags) } /* + * Process all the operands, copying new references so further processing + * can replace the arrays if copying is necessary. Fill in the iterator's + * natural ndim. + */ +static int +npyiter_prepare_operands(npy_intp niter, npy_intp *ndim, PyArrayObject **op_in, + PyArrayObject **op, PyArray_Descr **op_request_dtypes, + PyArray_Descr **op_dtype, + npy_intp *op_ndim, npy_uint32 *op_flags, char *op_itflags) +{ + npy_intp iiter; + + *ndim = 0; + + for (iiter = 0; iiter < niter; ++iiter) { + op[iiter] = op_in[iiter]; + Py_XINCREF(op[iiter]); + op_dtype[iiter] = NULL; + + /* Check the readonly/writeonly flags, and fill in op_itflags */ + if (!npyiter_check_per_op_flags(op_flags[iiter], &op_itflags[iiter])) { + npy_intp i; + + for (i = 0; i <= iiter; ++i) { + Py_XDECREF(op[i]); + Py_XDECREF(op_dtype[i]); + } + return 0; + } + + /* + * Prepare the operand. This produces an op_dtype[iiter] reference + * on success. + */ + if (!npyiter_prepare_one_operand(&op[iiter], + op_request_dtypes ? op_request_dtypes[iiter] : NULL, + &op_dtype[iiter], &op_ndim[iiter], + op_flags[iiter], &op_itflags[iiter])) { + npy_intp i; + + for (i = 0; i <= iiter; ++i) { + Py_XDECREF(op[i]); + Py_XDECREF(op_dtype[i]); + } + return 0; + } + /* The iterator dimensions is the maximum of all the inputs */ + if (op_ndim[iiter] > *ndim) { + *ndim = op_ndim[iiter]; + } + } + + + /* If all the operands were NULL, it's an error */ + if (op[0] == NULL) { + int all_null = 1; + for (iiter = 1; iiter < niter; ++iiter) { + if (op[iiter] != NULL) { + all_null = 0; + break; + } + } + if (all_null) { + npy_intp i; + + for (i = 0; i < niter; ++i) { + Py_XDECREF(op[i]); + Py_XDECREF(op_dtype[i]); + } + PyErr_SetString(PyExc_ValueError, + "At least one iterator input must be non-NULL"); + return 0; + } + } + + return 1; +} + +/* * Returns 1 if the from -> to cast can be done, based on the casting * flags provided in op_flags, and 0 otherwise. * @@ -2086,14 +1939,14 @@ npyiter_can_cast(PyArray_Descr *from, PyArray_Descr *to, NPY_CASTING casting) /* * Prepares a a constructor operand. Assumes a reference to 'op' - * is owned, and that 'op' may be replaced. Fills in 'op_dtype', - * 'op_type' and 'ndim'. + * is owned, and that 'op' may be replaced. Fills in 'op_dtype' + * and 'ndim'. * * Returns 1 on success, 0 on failure. */ static int -pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, - PyArray_Descr **op_dtype, int* op_type, +npyiter_prepare_one_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, + PyArray_Descr **op_dtype, npy_intp* op_ndim, npy_uint32 op_flags, char *op_itflags) { @@ -2116,7 +1969,6 @@ pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, /* If a requested dtype was provided, use it, otherwise NULL */ Py_XINCREF(op_request_dtype); *op_dtype = op_request_dtype; - *op_type = NPY_ITER_OP_NULL; *op_ndim = 0; /* No copying of NULL operands */ *op_itflags &= ~NPY_OP_ITFLAG_COPY; @@ -2132,7 +1984,6 @@ pyiter_prepare_operand(PyArrayObject **op, PyArray_Descr *op_request_dtype, return 0; } - *op_type = NPY_ITER_OP_ARRAY; *op_ndim = PyArray_NDIM(*op); /* PyArray_DESCR does not give us a reference */ *op_dtype = PyArray_DESCR(*op); @@ -3267,6 +3118,220 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, return ret; } +static int +npyiter_allocate_arrays(NpyIter *iter, PyArrayObject **op, + PyArray_Descr **op_dtype, PyTypeObject *subtype, + npy_uint32 *op_flags, char *op_itflags, + npy_intp *op_ndim, npy_intp **op_axes) +{ + npy_uint32 itflags = NIT_ITFLAGS(iter); + npy_intp idim, ndim = NIT_NDIM(iter); + npy_intp iiter, niter = NIT_NITER(iter); + + NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + + for (iiter = 0; iiter < niter; ++iiter) { + if (op[iiter] == NULL) { + PyArrayObject *out; + PyTypeObject *op_subtype; + + /* Check whether the subtype was disabled */ + op_subtype = (op_flags[iiter]&NPY_ITER_NO_SUBTYPE) ? + &PyArray_Type : subtype; + + /* Allocate the output array, if possible */ + out = npyiter_new_temp_array(iter, op_subtype, + op_ndim[iiter], NULL, + op_dtype[iiter], + op_axes ? op_axes[iiter] : NULL); + if (out == NULL) { + return 0; + } + + op[iiter] = out; + NIT_OBJECTS(iter)[iiter] = out; + + /* + * Now we need to replace the pointers and strides with values + * from the new array. + */ + npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter], + PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); + + /* New arrays are aligned and need no swapping or casting */ + op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; + op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST); + } + else if ((op_itflags[iiter]& + (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP)) && + (op_itflags[iiter]&NPY_OP_ITFLAG_COPY)) { + PyArrayObject *temp; + + /* Allocate the temporary array, if possible */ + temp = npyiter_new_temp_array(iter, &PyArray_Type, + PyArray_NDIM(op[iiter]), + PyArray_DIMS(op[iiter]), + op_dtype[iiter], + op_axes ? op_axes[iiter] : NULL); + if (temp == NULL) { + return 0; + } + + /* If the data will be read, copy it into temp */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) { + if (PyArray_CopyInto(temp, op[iiter]) != 0) { + Py_DECREF(temp); + return 0; + } + } + /* If the data will be written to, set UPDATEIFCOPY */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { + PyArray_FLAGS(temp) |= NPY_UPDATEIFCOPY; + PyArray_FLAGS(op[iiter]) &= ~NPY_WRITEABLE; + Py_INCREF(op[iiter]); + temp->base = (PyObject *)op[iiter]; + } + + Py_DECREF(op[iiter]); + op[iiter] = temp; + NIT_OBJECTS(iter)[iiter] = temp; + + /* + * Now we need to replace the pointers and strides with values + * from the temporary array. + */ + npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter], + PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); + + /* The temporary copy is aligned and needs no swap or cast */ + op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; + op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST); + } + else { + /* + * Buffering must be enabled for casting/conversion if copy + * wasn't specified. + */ + if (op_itflags[iiter]& + (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_COPYSWAP) && + !(itflags&NPY_ITFLAG_BUFFER)) { + PyErr_SetString(PyExc_TypeError, + "Iterator operand required copying or buffering, " + "but neither copying nor buffering was enabled"); + return 0; + } + + /* + * If the operand is aligned, any buffering can use aligned + * optimizations. + */ + if (PyArray_ISALIGNED(op[iiter])) { + op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; + } + } + + /* + * If no alignment, byte swap, or casting is needed, and + * the inner stride of this operand works for the whole + * array, we can set NPY_OP_ITFLAG_BUFNEVER. + * But, if buffering is enabled, write-buffering must be + * one-to-one, because the buffering write back won't combine + * values correctly. This test doesn't catch everything, but it will + * catch the most common case of a broadcasting a write-buffered + * dimension. + */ + if ((itflags&NPY_ITFLAG_BUFFER) && + (!(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST| + NPY_OP_ITFLAG_COPYSWAP)) || + (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) { + int is_one_to_one = 1; + npy_intp stride, shape, innerstride = 0, innershape; + NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); + npy_intp sizeof_axisdata = + NIT_SIZEOF_AXISDATA(itflags, ndim, niter); + /* Find stride of the first non-empty shape */ + for (idim = 0; idim < ndim; ++idim) { + innershape = NAD_SHAPE(axisdata); + if (innershape != 1) { + innerstride = NAD_STRIDES(axisdata)[iiter]; + if (innerstride == 0) { + is_one_to_one = 0; + } + break; + } + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + ++idim; + NIT_ADVANCE_AXISDATA(axisdata, 1); + /* Check that everything could have coalesced together */ + for (; idim < ndim; ++idim) { + stride = NAD_STRIDES(axisdata)[iiter]; + shape = NAD_SHAPE(axisdata); + if (shape != 1) { + if (stride == 0) { + is_one_to_one = 0; + } + /* + * If N times the inner stride doesn't equal this + * stride, the multi-dimensionality is needed. + */ + if (innerstride*innershape != stride) { + break; + } + else { + innershape *= shape; + } + } + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + /* + * If we looped all the way to the end, one stride works. + * Set that stride, because it may not belong to the first + * dimension. + */ + if (idim == ndim && + !(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST| + NPY_OP_ITFLAG_COPYSWAP))) { + op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iiter] = innerstride; + } + else if (!is_one_to_one && + (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)) { + PyErr_SetString(PyExc_ValueError, + "Iterator operand requires write buffering, " + "but has dimensions which have been broadcasted " + "and would be combined incorrectly"); + return 0; + } + } + } + + return 1; +} + +/* + * The __array_priority__ attribute of the inputs determines + * the subtype of any output arrays. This function finds the + * subtype of the input array with highest priority. + */ +static void +npyiter_get_priority_subtype(PyArrayObject **op, char *op_itflags, + npy_intp niter, double *subtype_priority, + PyTypeObject **subtype) +{ + npy_intp iiter; + + for (iiter = 0; iiter < niter; ++iiter) { + if (op_itflags[iiter]&NPY_OP_ITFLAG_READ) { + double priority = PyArray_GetPriority((PyObject *)op[iiter], 0.0); + if (priority > *subtype_priority) { + *subtype_priority = priority; + *subtype = Py_TYPE(op[iiter]); + } + } + } +} + /* * Calculates a dtype that all the types can be promoted to, using the * ufunc rules. If only_inputs is 1, it leaves any operands that |