summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/SConscript1
-rw-r--r--numpy/core/code_generators/genapi.py1
-rw-r--r--numpy/core/setup.py24
-rw-r--r--numpy/core/src/multiarray/nditer.c.src2590
-rw-r--r--numpy/core/src/multiarray/nditer_api.c2601
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c2
6 files changed, 2628 insertions, 2591 deletions
diff --git a/numpy/core/SConscript b/numpy/core/SConscript
index 802b8aa2b..f831515b9 100644
--- a/numpy/core/SConscript
+++ b/numpy/core/SConscript
@@ -469,6 +469,7 @@ if ENABLE_SEPARATE_COMPILATION:
pjoin('src', 'multiarray', 'buffer.c'),
pjoin('src', 'multiarray', 'numpymemoryview.c'),
pjoin('src', 'multiarray', 'scalarapi.c'),
+ pjoin('src', 'multiarray', 'nditer_api.c'),
pjoin('src', 'multiarray', 'nditer_constr.c'),
pjoin('src', 'multiarray', 'nditer_pywrap.c'),
pjoin('src', 'multiarray', 'dtype_transfer.c')]
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index 23b85f9fe..38628d1c6 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -48,6 +48,7 @@ API_FILES = [join('multiarray', 'methods.c'),
join('multiarray', 'datetime_busday.c'),
join('multiarray', 'datetime_busdaycal.c'),
join('multiarray', 'nditer.c.src'),
+ join('multiarray', 'nditer_api.c'),
join('multiarray', 'nditer_constr.c'),
join('multiarray', 'nditer_pywrap.c'),
join('multiarray', 'einsum.c.src'),
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 38ef079b0..aa3175b20 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -724,7 +724,28 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'shape.h'),
join('src', 'multiarray', 'ucsnarrow.h'),
join('src', 'multiarray', 'usertypes.h'),
- join('src', 'private', 'lowlevel_strided_loops.h')]
+ join('src', 'private', 'lowlevel_strided_loops.h'),
+ join('include', 'numpy', 'arrayobject.h'),
+ join('include', 'numpy', '_neighborhood_iterator_imp.h'),
+ join('include', 'numpy', 'npy_endian.h'),
+ join('include', 'numpy', 'old_defines.h'),
+ join('include', 'numpy', 'arrayscalars.h'),
+ join('include', 'numpy', 'noprefix.h'),
+ join('include', 'numpy', 'npy_interrupt.h'),
+ join('include', 'numpy', 'oldnumeric.h'),
+ join('include', 'numpy', 'npy_3kcompat.h'),
+ join('include', 'numpy', 'npy_math.h'),
+ join('include', 'numpy', 'halffloat.h'),
+ join('include', 'numpy', 'npy_common.h'),
+ join('include', 'numpy', 'npy_os.h'),
+ join('include', 'numpy', 'utils.h'),
+ join('include', 'numpy', 'ndarrayobject.h'),
+ join('include', 'numpy', 'npy_cpu.h'),
+ join('include', 'numpy', 'numpyconfig.h'),
+ join('include', 'numpy', 'ndarraytypes.h'),
+ join('include', 'numpy', 'npy_deprecated_api.h'),
+ join('include', 'numpy', '_numpyconfig.h.in'),
+ ]
multiarray_src = [
join('src', 'multiarray', 'arrayobject.c'),
@@ -753,6 +774,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'methods.c'),
join('src', 'multiarray', 'multiarraymodule.c'),
join('src', 'multiarray', 'nditer.c.src'),
+ join('src', 'multiarray', 'nditer_api.c'),
join('src', 'multiarray', 'nditer_constr.c'),
join('src', 'multiarray', 'nditer_pywrap.c'),
join('src', 'multiarray', 'number.c'),
diff --git a/numpy/core/src/multiarray/nditer.c.src b/numpy/core/src/multiarray/nditer.c.src
index 991426f1c..59aae244b 100644
--- a/numpy/core/src/multiarray/nditer.c.src
+++ b/numpy/core/src/multiarray/nditer.c.src
@@ -1,5 +1,6 @@
/*
- * This file implements a highly flexible iterator for NumPy.
+ * This file implements the API functions for NumPy's nditer that
+ * are specialized using the templating system.
*
* Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
* The Univerity of British Columbia
@@ -11,646 +12,6 @@
#define NPY_ITERATOR_IMPLEMENTATION_CODE
#include "nditer_impl.h"
-/* Internal helper functions private to this file */
-static npy_intp
-npyiter_checkreducesize(NpyIter *iter, npy_intp count,
- npy_intp *reduce_innersize,
- npy_intp *reduce_outerdim);
-
-/*NUMPY_API
- * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
- * was set for iterator creation, and does not work if buffering is
- * enabled. This function also resets the iterator to its initial state.
- *
- * Returns NPY_SUCCEED or NPY_FAIL.
- */
-NPY_NO_EXPORT int
-NpyIter_RemoveAxis(NpyIter *iter, int axis)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int iop, nop = NIT_NOP(iter);
-
- int xdim = 0;
- npy_int8 *perm = NIT_PERM(iter);
- NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;
- npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
- char **resetdataptr = NIT_RESETDATAPTR(iter);
-
- if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
- PyErr_SetString(PyExc_RuntimeError,
- "Iterator RemoveAxis may only be called "
- "if a multi-index is being tracked");
- return NPY_FAIL;
- }
- else if (itflags&NPY_ITFLAG_HASINDEX) {
- PyErr_SetString(PyExc_RuntimeError,
- "Iterator RemoveAxis may not be called on "
- "an index is being tracked");
- return NPY_FAIL;
- }
- else if (itflags&NPY_ITFLAG_BUFFER) {
- PyErr_SetString(PyExc_RuntimeError,
- "Iterator RemoveAxis may not be called on "
- "a buffered iterator");
- return NPY_FAIL;
- }
- else if (axis < 0 || axis >= ndim) {
- PyErr_SetString(PyExc_ValueError,
- "axis out of bounds in iterator RemoveAxis");
- return NPY_FAIL;
- }
-
- /* Reverse axis, since the iterator treats them that way */
- axis = ndim - 1 - axis;
-
- /* First find the axis in question */
- for (idim = 0; idim < ndim; ++idim) {
- /* If this is it, and it's iterated forward, done */
- if (perm[idim] == axis) {
- xdim = idim;
- break;
- }
- /* If this is it, but it's iterated backward, must reverse the axis */
- else if (-1 - perm[idim] == axis) {
- npy_intp *strides = NAD_STRIDES(axisdata_del);
- npy_intp shape = NAD_SHAPE(axisdata_del), offset;
-
- xdim = idim;
-
- /*
- * Adjust baseoffsets and resetbaseptr back to the start of
- * this axis.
- */
- for (iop = 0; iop < nop; ++iop) {
- offset = (shape-1)*strides[iop];
- baseoffsets[iop] += offset;
- resetdataptr[iop] += offset;
- }
- break;
- }
-
- NIT_ADVANCE_AXISDATA(axisdata_del, 1);
- }
-
- if (idim == ndim) {
- PyErr_SetString(PyExc_RuntimeError,
- "internal error in iterator perm");
- return NPY_FAIL;
- }
-
- if (NAD_SHAPE(axisdata_del) == 0) {
- PyErr_SetString(PyExc_ValueError,
- "cannot remove a zero-sized axis from an iterator");
- return NPY_FAIL;
- }
-
- /* Adjust the permutation */
- for (idim = 0; idim < ndim-1; ++idim) {
- npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
- if (p >= 0) {
- if (p > axis) {
- --p;
- }
- }
- else if (p <= 0) {
- if (p < -1-axis) {
- ++p;
- }
- }
- perm[idim] = p;
- }
-
- /* Adjust the iteration size */
- NIT_ITERSIZE(iter) /= NAD_SHAPE(axisdata_del);
-
- /* Shift all the axisdata structures by one */
- axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
- memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);
-
- /* If there is more than one dimension, shrink the iterator */
- if (ndim > 1) {
- NIT_NDIM(iter) = ndim-1;
- }
- /* Otherwise convert it to a singleton dimension */
- else {
- npy_intp *strides = NAD_STRIDES(axisdata_del);
- NAD_SHAPE(axisdata_del) = 1;
- for (iop = 0; iop < nop; ++iop) {
- strides[iop] = 0;
- }
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
- }
-
- return NpyIter_Reset(iter, NULL);
-}
-
-/*NUMPY_API
- * Removes multi-index support from an iterator.
- *
- * Returns NPY_SUCCEED or NPY_FAIL.
- */
-NPY_NO_EXPORT int
-NpyIter_RemoveMultiIndex(NpyIter *iter)
-{
- npy_uint32 itflags;
-
- /* Make sure the iterator is reset */
- if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
- return NPY_FAIL;
- }
-
- itflags = NIT_ITFLAGS(iter);
- if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
- NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
- npyiter_coalesce_axes(iter);
- }
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Removes the inner loop handling (so HasExternalLoop returns true)
- */
-NPY_NO_EXPORT int
-NpyIter_EnableExternalLoop(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- /* Check conditions under which this can be done */
- if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) {
- PyErr_SetString(PyExc_ValueError,
- "Iterator flag EXTERNAL_LOOP cannot be used "
- "if an index or multi-index is being tracked");
- return NPY_FAIL;
- }
- if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP))
- == (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) {
- PyErr_SetString(PyExc_ValueError,
- "Iterator flag EXTERNAL_LOOP cannot be used "
- "with ranged iteration unless buffering is also enabled");
- return NPY_FAIL;
- }
- /* Set the flag */
- if (!(itflags&NPY_ITFLAG_EXLOOP)) {
- itflags |= NPY_ITFLAG_EXLOOP;
- NIT_ITFLAGS(iter) = itflags;
-
- /*
- * Check whether we can apply the single iteration
- * optimization to the iternext function.
- */
- if (!(itflags&NPY_ITFLAG_BUFFER)) {
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
- }
- }
- }
-
- /* Reset the iterator */
- return NpyIter_Reset(iter, NULL);
-}
-
-/*NUMPY_API
- * Resets the iterator to its initial state
- *
- * If errmsg is non-NULL, it should point to a variable which will
- * receive the error message, and no Python exception will be set.
- * This is so that the function can be called from code not holding
- * the GIL.
- */
-NPY_NO_EXPORT int
-NpyIter_Reset(NpyIter *iter, char **errmsg)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *bufferdata;
-
- /* If buffer allocation was delayed, do it now */
- if (itflags&NPY_ITFLAG_DELAYBUF) {
- if (!npyiter_allocate_buffers(iter, errmsg)) {
- return NPY_FAIL;
- }
- NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
- }
- else {
- /*
- * If the iterindex is already right, no need to
- * do anything
- */
- bufferdata = NIT_BUFFERDATA(iter);
- if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
- NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
- NBF_SIZE(bufferdata) > 0) {
- return NPY_SUCCEED;
- }
-
- /* Copy any data from the buffers back to the arrays */
- npyiter_copy_from_buffers(iter);
- }
- }
-
- npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- /* Prepare the next buffers and set iterend/size */
- npyiter_copy_to_buffers(iter, NULL);
- }
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Resets the iterator to its initial state, with new base data pointers
- *
- * If errmsg is non-NULL, it should point to a variable which will
- * receive the error message, and no Python exception will be set.
- * This is so that the function can be called from code not holding
- * the GIL.
- */
-NPY_NO_EXPORT int
-NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int iop, nop = NIT_NOP(iter);
-
- char **resetdataptr = NIT_RESETDATAPTR(iter);
- npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- /* If buffer allocation was delayed, do it now */
- if (itflags&NPY_ITFLAG_DELAYBUF) {
- if (!npyiter_allocate_buffers(iter, errmsg)) {
- return NPY_FAIL;
- }
- NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
- }
- else {
- /* Copy any data from the buffers back to the arrays */
- npyiter_copy_from_buffers(iter);
- }
- }
-
- /* The new data pointers for resetting */
- for (iop = 0; iop < nop; ++iop) {
- resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
- }
-
- npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- /* Prepare the next buffers and set iterend/size */
- npyiter_copy_to_buffers(iter, NULL);
- }
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Resets the iterator to a new iterator index range
- *
- * If errmsg is non-NULL, it should point to a variable which will
- * receive the error message, and no Python exception will be set.
- * This is so that the function can be called from code not holding
- * the GIL.
- */
-NPY_NO_EXPORT int
-NpyIter_ResetToIterIndexRange(NpyIter *iter,
- npy_intp istart, npy_intp iend, char **errmsg)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- /*int nop = NIT_NOP(iter);*/
-
- if (!(itflags&NPY_ITFLAG_RANGE)) {
- if (errmsg == NULL) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call ResetToIterIndexRange on an iterator without "
- "requesting ranged iteration support in the constructor");
- }
- else {
- *errmsg = "Cannot call ResetToIterIndexRange on an iterator "
- "without requesting ranged iteration support in the "
- "constructor";
- }
- return NPY_FAIL;
- }
-
- if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
- if (errmsg == NULL) {
- PyErr_Format(PyExc_ValueError,
- "Out-of-bounds range [%d, %d) passed to "
- "ResetToIterIndexRange", (int)istart, (int)iend);
- }
- else {
- *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
- }
- return NPY_FAIL;
- }
- else if (iend < istart) {
- if (errmsg == NULL) {
- PyErr_Format(PyExc_ValueError,
- "Invalid range [%d, %d) passed to ResetToIterIndexRange",
- (int)istart, (int)iend);
- }
- else {
- *errmsg = "Invalid range passed to ResetToIterIndexRange";
- }
- return NPY_FAIL;
- }
-
- NIT_ITERSTART(iter) = istart;
- NIT_ITEREND(iter) = iend;
-
- return NpyIter_Reset(iter, errmsg);
-}
-
-/*NUMPY_API
- * Sets the iterator to the specified multi-index, which must have the
- * correct number of entries for 'ndim'. It is only valid
- * when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation
- * fails if the multi-index is out of bounds.
- *
- * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
- */
-NPY_NO_EXPORT int
-NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp *multi_index)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- npy_intp iterindex, factor;
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
- npy_int8 *perm;
-
- if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoMultiIndex on an iterator without "
- "requesting a multi-index in the constructor");
- return NPY_FAIL;
- }
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoMultiIndex on an iterator which "
- "is buffered");
- return NPY_FAIL;
- }
-
- if (itflags&NPY_ITFLAG_EXLOOP) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoMultiIndex on an iterator which "
- "has the flag EXTERNAL_LOOP");
- return NPY_FAIL;
- }
-
- perm = NIT_PERM(iter);
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- /* Compute the iterindex corresponding to the multi-index */
- iterindex = 0;
- factor = 1;
- for (idim = 0; idim < ndim; ++idim) {
- npy_int8 p = perm[idim];
- npy_intp i, shape;
-
- shape = NAD_SHAPE(axisdata);
- if (p < 0) {
- /* If the perm entry is negative, reverse the index */
- i = shape - multi_index[ndim+p] - 1;
- }
- else {
- i = multi_index[ndim-p-1];
- }
-
- /* Bounds-check this index */
- if (i >= 0 && i < shape) {
- iterindex += factor * i;
- factor *= shape;
- }
- else {
- PyErr_SetString(PyExc_IndexError,
- "Iterator GotoMultiIndex called with an out-of-bounds "
- "multi-index");
- return NPY_FAIL;
- }
-
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
-
- if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
- PyErr_SetString(PyExc_IndexError,
- "Iterator GotoMultiIndex called with a multi-index outside the "
- "restricted iteration range");
- return NPY_FAIL;
- }
-
- npyiter_goto_iterindex(iter, iterindex);
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * If the iterator is tracking an index, sets the iterator
- * to the specified index.
- *
- * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
- */
-NPY_NO_EXPORT int
-NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- npy_intp iterindex, factor;
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
-
- if (!(itflags&NPY_ITFLAG_HASINDEX)) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoIndex on an iterator without "
- "requesting a C or Fortran index in the constructor");
- return NPY_FAIL;
- }
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoIndex on an iterator which "
- "is buffered");
- return NPY_FAIL;
- }
-
- if (itflags&NPY_ITFLAG_EXLOOP) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoIndex on an iterator which "
- "has the flag EXTERNAL_LOOP");
- return NPY_FAIL;
- }
-
- if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
- PyErr_SetString(PyExc_IndexError,
- "Iterator GotoIndex called with an out-of-bounds "
- "index");
- return NPY_FAIL;
- }
-
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- /* Compute the iterindex corresponding to the flat_index */
- iterindex = 0;
- factor = 1;
- for (idim = 0; idim < ndim; ++idim) {
- npy_intp i, shape, iterstride;
-
- iterstride = NAD_STRIDES(axisdata)[nop];
- shape = NAD_SHAPE(axisdata);
-
- /* Extract the index from the flat_index */
- if (iterstride == 0) {
- i = 0;
- }
- else if (iterstride < 0) {
- i = shape - (flat_index/(-iterstride))%shape - 1;
- }
- else {
- i = (flat_index/iterstride)%shape;
- }
-
- /* Add its contribution to iterindex */
- iterindex += factor * i;
- factor *= shape;
-
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
-
-
- if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
- PyErr_SetString(PyExc_IndexError,
- "Iterator GotoIndex called with an index outside the "
- "restricted iteration range.");
- return NPY_FAIL;
- }
-
- npyiter_goto_iterindex(iter, iterindex);
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Sets the iterator position to the specified iterindex,
- * which matches the iteration order of the iterator.
- *
- * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
- */
-NPY_NO_EXPORT int
-NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int iop, nop = NIT_NOP(iter);
-
- if (itflags&NPY_ITFLAG_EXLOOP) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot call GotoIterIndex on an iterator which "
- "has the flag EXTERNAL_LOOP");
- return NPY_FAIL;
- }
-
- if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
- PyErr_SetString(PyExc_IndexError,
- "Iterator GotoIterIndex called with an iterindex outside the "
- "iteration range.");
- return NPY_FAIL;
- }
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- npy_intp bufiterend, size;
-
- size = NBF_SIZE(bufferdata);
- bufiterend = NBF_BUFITEREND(bufferdata);
- /* Check if the new iterindex is already within the buffer */
- if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
- iterindex >= bufiterend - size) {
- npy_intp *strides, delta;
- char **ptrs;
-
- strides = NBF_STRIDES(bufferdata);
- ptrs = NBF_PTRS(bufferdata);
- delta = iterindex - NIT_ITERINDEX(iter);
-
- for (iop = 0; iop < nop; ++iop) {
- ptrs[iop] += delta * strides[iop];
- }
-
- NIT_ITERINDEX(iter) = iterindex;
- }
- /* Start the buffer at the provided iterindex */
- else {
- /* Write back to the arrays */
- npyiter_copy_from_buffers(iter);
-
- npyiter_goto_iterindex(iter, iterindex);
-
- /* Prepare the next buffers and set iterend/size */
- npyiter_copy_to_buffers(iter, NULL);
- }
- }
- else {
- npyiter_goto_iterindex(iter, iterindex);
- }
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Gets the current iteration index
- */
-NPY_NO_EXPORT npy_intp
-NpyIter_GetIterIndex(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- /* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */
- if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) {
- return NIT_ITERINDEX(iter);
- }
- else {
- npy_intp iterindex;
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
-
- iterindex = 0;
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
-
- for (idim = ndim-2; idim >= 0; --idim) {
- iterindex += NAD_INDEX(axisdata);
- NIT_ADVANCE_AXISDATA(axisdata, -1);
- iterindex *= NAD_SHAPE(axisdata);
- }
- iterindex += NAD_INDEX(axisdata);
-
- return iterindex;
- }
-}
-
/* SPECIALIZED iternext functions that handle the non-buffering part */
/**begin repeat
@@ -1241,1951 +602,4 @@ NpyIter_GetGetMultiIndex(NpyIter *iter, char **errmsg)
}
-/*NUMPY_API
- * Whether the buffer allocation is being delayed
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_HasDelayedBufAlloc(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0;
-}
-
-/*NUMPY_API
- * Whether the iterator handles the inner loop
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_HasExternalLoop(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0;
-}
-
-/*NUMPY_API
- * Whether the iterator is tracking a multi-index
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_HasMultiIndex(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0;
-}
-
-/*NUMPY_API
- * Whether the iterator is tracking an index
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_HasIndex(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0;
-}
-
-/*NUMPY_API
- * Whether the iteration could be done with no buffering.
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_RequiresBuffering(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int iop, nop = NIT_NOP(iter);
-
- char *op_itflags;
-
- if (!(itflags&NPY_ITFLAG_BUFFER)) {
- return 0;
- }
-
- op_itflags = NIT_OPITFLAGS(iter);
-
- /* If any operand requires a cast, buffering is mandatory */
- for (iop = 0; iop < nop; ++iop) {
- if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
- return 1;
- }
- }
-
- return 0;
-}
-
-/*NUMPY_API
- * Whether the iteration loop, and in particular the iternext()
- * function, needs API access. If this is true, the GIL must
- * be retained while iterating.
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_IterationNeedsAPI(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
-}
-
-/*NUMPY_API
- * Gets the number of dimensions being iterated
- */
-NPY_NO_EXPORT int
-NpyIter_GetNDim(NpyIter *iter)
-{
- return NIT_NDIM(iter);
-}
-
-/*NUMPY_API
- * Gets the number of operands being iterated
- */
-NPY_NO_EXPORT int
-NpyIter_GetNOp(NpyIter *iter)
-{
- return NIT_NOP(iter);
-}
-
-/*NUMPY_API
- * Gets the number of elements being iterated
- */
-NPY_NO_EXPORT npy_intp
-NpyIter_GetIterSize(NpyIter *iter)
-{
- return NIT_ITERSIZE(iter);
-}
-
-/*NUMPY_API
- * Whether the iterator is buffered
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_IsBuffered(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
-}
-
-/*NUMPY_API
- * Whether the inner loop can grow if buffering is unneeded
- */
-NPY_NO_EXPORT npy_bool
-NpyIter_IsGrowInner(NpyIter *iter)
-{
- return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
-}
-
-/*NUMPY_API
- * Gets the size of the buffer, or 0 if buffering is not enabled
- */
-NPY_NO_EXPORT npy_intp
-NpyIter_GetBufferSize(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- return NBF_BUFFERSIZE(bufferdata);
- }
- else {
- return 0;
- }
-
-}
-
-/*NUMPY_API
- * Gets the range of iteration indices being iterated
- */
-NPY_NO_EXPORT void
-NpyIter_GetIterIndexRange(NpyIter *iter,
- npy_intp *istart, npy_intp *iend)
-{
- *istart = NIT_ITERSTART(iter);
- *iend = NIT_ITEREND(iter);
-}
-
-/*NUMPY_API
- * Gets the broadcast shape if a multi-index is being tracked by the iterator,
- * otherwise gets the shape of the iteration as Fortran-order
- * (fastest-changing index first).
- *
- * The reason Fortran-order is returned when a multi-index
- * is not enabled is that this is providing a direct view into how
- * the iterator traverses the n-dimensional space. The iterator organizes
- * its memory from fastest index to slowest index, and when
- * a multi-index is enabled, it uses a permutation to recover the original
- * order.
- *
- * Returns NPY_SUCCEED or NPY_FAIL.
- */
-NPY_NO_EXPORT int
-NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- int idim, sizeof_axisdata;
- NpyIter_AxisData *axisdata;
- npy_int8 *perm;
-
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
- perm = NIT_PERM(iter);
- for(idim = 0; idim < ndim; ++idim) {
- npy_int8 p = perm[idim];
- if (p < 0) {
- outshape[ndim+p] = NAD_SHAPE(axisdata);
- }
- else {
- outshape[ndim-p-1] = NAD_SHAPE(axisdata);
- }
-
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
- }
- else {
- for(idim = 0; idim < ndim; ++idim) {
- outshape[idim] = NAD_SHAPE(axisdata);
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
- }
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Builds a set of strides which are the same as the strides of an
- * output array created using the NPY_ITER_ALLOCATE flag, where NULL
- * was passed for op_axes. This is for data packed contiguously,
- * but not necessarily in C or Fortran order. This should be used
- * together with NpyIter_GetShape and NpyIter_GetNDim.
- *
- * A use case for this function is to match the shape and layout of
- * the iterator and tack on one or more dimensions. For example,
- * in order to generate a vector per input value for a numerical gradient,
- * you pass in ndim*itemsize for itemsize, then add another dimension to
- * the end with size ndim and stride itemsize. To do the Hessian matrix,
- * you do the same thing but add two dimensions, or take advantage of
- * the symmetry and pack it into 1 dimension with a particular encoding.
- *
- * This function may only be called if the iterator is tracking a multi-index
- * and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
- * being iterated in reverse order.
- *
- * If an array is created with this method, simply adding 'itemsize'
- * for each iteration will traverse the new array matching the
- * iterator.
- *
- * Returns NPY_SUCCEED or NPY_FAIL.
- */
-NPY_NO_EXPORT int
-NpyIter_CreateCompatibleStrides(NpyIter *iter,
- npy_intp itemsize, npy_intp *outstrides)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- npy_intp sizeof_axisdata;
- NpyIter_AxisData *axisdata;
- npy_int8 *perm;
-
- if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
- PyErr_SetString(PyExc_RuntimeError,
- "Iterator CreateCompatibleStrides may only be called "
- "if a multi-index is being tracked");
- return NPY_FAIL;
- }
-
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- perm = NIT_PERM(iter);
- for(idim = 0; idim < ndim; ++idim) {
- npy_int8 p = perm[idim];
- if (p < 0) {
- PyErr_SetString(PyExc_RuntimeError,
- "Iterator CreateCompatibleStrides may only be called "
- "if DONT_NEGATE_STRIDES was used to prevent reverse "
- "iteration of an axis");
- return NPY_FAIL;
- }
- else {
- outstrides[ndim-p-1] = itemsize;
- }
-
- itemsize *= NAD_SHAPE(axisdata);
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
-
- return NPY_SUCCEED;
-}
-
-/*NUMPY_API
- * Get the array of data pointers (1 per object being iterated)
- *
- * This function may be safely called without holding the Python GIL.
- */
-NPY_NO_EXPORT char **
-NpyIter_GetDataPtrArray(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- return NBF_PTRS(bufferdata);
- }
- else {
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- return NAD_PTRS(axisdata);
- }
-}
-
-/*NUMPY_API
- * Get the array of data pointers (1 per object being iterated),
- * directly into the arrays (never pointing to a buffer), for starting
- * unbuffered iteration. This always returns the addresses for the
- * iterator position as reset to iterator index 0.
- *
- * These pointers are different from the pointers accepted by
- * NpyIter_ResetBasePointers, because the direction along some
- * axes may have been reversed, requiring base offsets.
- *
- * This function may be safely called without holding the Python GIL.
- */
-NPY_NO_EXPORT char **
-NpyIter_GetInitialDataPtrArray(NpyIter *iter)
-{
- /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- return NIT_RESETDATAPTR(iter);
-}
-
-/*NUMPY_API
- * Get the array of data type pointers (1 per object being iterated)
- */
-NPY_NO_EXPORT PyArray_Descr **
-NpyIter_GetDescrArray(NpyIter *iter)
-{
- /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
- /*int ndim = NIT_NDIM(iter);*/
- /*int nop = NIT_NOP(iter);*/
-
- return NIT_DTYPES(iter);
-}
-
-/*NUMPY_API
- * Get the array of objects being iterated
- */
-NPY_NO_EXPORT PyArrayObject **
-NpyIter_GetOperandArray(NpyIter *iter)
-{
- /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- return NIT_OPERANDS(iter);
-}
-
-/*NUMPY_API
- * Returns a view to the i-th object with the iterator's internal axes
- */
-NPY_NO_EXPORT PyArrayObject *
-NpyIter_GetIterView(NpyIter *iter, npy_intp i)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
- PyArrayObject *obj, *view;
- PyArray_Descr *dtype;
- char *dataptr;
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
- int writeable;
-
- if (i < 0 || i >= nop) {
- PyErr_SetString(PyExc_IndexError,
- "index provided for an iterator view was out of bounds");
- return NULL;
- }
-
- /* Don't provide views if buffering is enabled */
- if (itflags&NPY_ITFLAG_BUFFER) {
- PyErr_SetString(PyExc_ValueError,
- "cannot provide an iterator view when buffering is enabled");
- return NULL;
- }
-
- obj = NIT_OPERANDS(iter)[i];
- dtype = PyArray_DESCR(obj);
- writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
- dataptr = NIT_RESETDATAPTR(iter)[i];
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- /* Retrieve the shape and strides from the axisdata */
- for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- shape[ndim-idim-1] = NAD_SHAPE(axisdata);
- strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];
- }
-
- Py_INCREF(dtype);
- view = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, ndim,
- shape, strides, dataptr,
- writeable ? NPY_ARRAY_WRITEABLE : 0,
- NULL);
- if (view == NULL) {
- return NULL;
- }
- /* Tell the view who owns the data */
- Py_INCREF(obj);
- view->base = (PyObject *)obj;
- /* Make sure all the flags are good */
- PyArray_UpdateFlags(view, NPY_ARRAY_UPDATE_ALL);
-
- return view;
-}
-
-/*NUMPY_API
- * Get a pointer to the index, if it is being tracked
- */
-NPY_NO_EXPORT npy_intp *
-NpyIter_GetIndexPtr(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
-
- if (itflags&NPY_ITFLAG_HASINDEX) {
- /* The index is just after the data pointers */
- return (npy_intp*)NAD_PTRS(axisdata) + nop;
- }
- else {
- return NULL;
- }
-}
-
-/*NUMPY_API
- * Gets an array of read flags (1 per object being iterated)
- */
-NPY_NO_EXPORT void
-NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
-{
- /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
- /*int ndim = NIT_NDIM(iter);*/
- int iop, nop = NIT_NOP(iter);
-
- char *op_itflags = NIT_OPITFLAGS(iter);
-
- for (iop = 0; iop < nop; ++iop) {
- outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
- }
-}
-
-/*NUMPY_API
- * Gets an array of write flags (1 per object being iterated)
- */
-NPY_NO_EXPORT void
-NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
-{
- /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
- /*int ndim = NIT_NDIM(iter);*/
- int iop, nop = NIT_NOP(iter);
-
- char *op_itflags = NIT_OPITFLAGS(iter);
-
- for (iop = 0; iop < nop; ++iop) {
- outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
- }
-}
-
-
-/*NUMPY_API
- * Get the array of strides for the inner loop (when HasExternalLoop is true)
- *
- * This function may be safely called without holding the Python GIL.
- */
-NPY_NO_EXPORT npy_intp *
-NpyIter_GetInnerStrideArray(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
- return NBF_STRIDES(data);
- }
- else {
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- return NAD_STRIDES(axisdata);
- }
-}
-
-/*NUMPY_API
- * Gets the array of strides for the specified axis.
- * If the iterator is tracking a multi-index, gets the strides
- * for the axis specified, otherwise gets the strides for
- * the iteration axis as Fortran order (fastest-changing axis first).
- *
- * Returns NULL if an error occurs.
- */
-NPY_NO_EXPORT npy_intp *
-NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- npy_int8 *perm = NIT_PERM(iter);
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- if (axis < 0 || axis >= ndim) {
- PyErr_SetString(PyExc_ValueError,
- "axis out of bounds in iterator GetStrideAxisArray");
- return NULL;
- }
-
- if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
- /* Reverse axis, since the iterator treats them that way */
- axis = ndim-1-axis;
-
- /* First find the axis in question */
- for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- if (perm[idim] == axis || -1 - perm[idim] == axis) {
- return NAD_STRIDES(axisdata);
- }
- }
- }
- else {
- return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
- }
-
- PyErr_SetString(PyExc_RuntimeError,
- "internal error in iterator perm");
- return NULL;
-}
-
-/*NUMPY_API
- * Get an array of strides which are fixed. Any strides which may
- * change during iteration receive the value NPY_MAX_INTP. Once
- * the iterator is ready to iterate, call this to get the strides
- * which will always be fixed in the inner loop, then choose optimized
- * inner loop functions which take advantage of those fixed strides.
- *
- * This function may be safely called without holding the Python GIL.
- */
-NPY_NO_EXPORT void
-NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int ndim = NIT_NDIM(iter);
- int iop, nop = NIT_NOP(iter);
-
- NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
- npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
- char *op_itflags = NIT_OPITFLAGS(iter);
- npy_intp stride, *strides = NBF_STRIDES(data),
- *ad_strides = NAD_STRIDES(axisdata0);
- PyArray_Descr **dtypes = NIT_DTYPES(iter);
-
- for (iop = 0; iop < nop; ++iop) {
- stride = strides[iop];
- /*
- * Operands which are always/never buffered have fixed strides,
- * and everything has fixed strides when ndim is 0 or 1
- */
- if (ndim <= 1 || (op_itflags[iop]&
- (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
- out_strides[iop] = stride;
- }
- /* If it's a reduction, 0-stride inner loop may have fixed stride */
- else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
- /* If it's a reduction operand, definitely fixed stride */
- if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
- out_strides[iop] = stride;
- }
- /*
- * Otherwise it's a fixed stride if the stride is 0
- * for all inner dimensions of the reduction double loop
- */
- else {
- NpyIter_AxisData *axisdata = axisdata0;
- int idim,
- reduce_outerdim = NBF_REDUCE_OUTERDIM(data);
- for (idim = 0; idim < reduce_outerdim; ++idim) {
- if (NAD_STRIDES(axisdata)[iop] != 0) {
- break;
- }
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
- /* If all the strides were 0, the stride won't change */
- if (idim == reduce_outerdim) {
- out_strides[iop] = stride;
- }
- else {
- out_strides[iop] = NPY_MAX_INTP;
- }
- }
- }
- /*
- * Inner loop contiguous array means its stride won't change when
- * switching between buffering and not buffering
- */
- else if (ad_strides[iop] == dtypes[iop]->elsize) {
- out_strides[iop] = ad_strides[iop];
- }
- /*
- * Otherwise the strides can change if the operand is sometimes
- * buffered, sometimes not.
- */
- else {
- out_strides[iop] = NPY_MAX_INTP;
- }
- }
- }
- else {
- /* If there's no buffering, the strides are always fixed */
- memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
- }
-}
-
-
-/*NUMPY_API
- * Get a pointer to the size of the inner loop (when HasExternalLoop is true)
- *
- * This function may be safely called without holding the Python GIL.
- */
-NPY_NO_EXPORT npy_intp *
-NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- /*int ndim = NIT_NDIM(iter);*/
- int nop = NIT_NOP(iter);
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
- return &NBF_SIZE(data);
- }
- else {
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- return &NAD_SHAPE(axisdata);
- }
-}
-
-NPY_NO_EXPORT void
-npyiter_coalesce_axes(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- npy_intp istrides, nstrides = NAD_NSTRIDES();
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
- npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- NpyIter_AxisData *ad_compress;
- npy_intp new_ndim = 1;
-
- /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
- NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
-
- axisdata = NIT_AXISDATA(iter);
- ad_compress = axisdata;
-
- for (idim = 0; idim < ndim-1; ++idim) {
- int can_coalesce = 1;
- npy_intp shape0 = NAD_SHAPE(ad_compress);
- npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
- npy_intp *strides0 = NAD_STRIDES(ad_compress);
- npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));
-
- /* Check that all the axes can be coalesced */
- for (istrides = 0; istrides < nstrides; ++istrides) {
- if (!((shape0 == 1 && strides0[istrides] == 0) ||
- (shape1 == 1 && strides1[istrides] == 0)) &&
- (strides0[istrides]*shape0 != strides1[istrides])) {
- can_coalesce = 0;
- break;
- }
- }
-
- if (can_coalesce) {
- npy_intp *strides = NAD_STRIDES(ad_compress);
-
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
- for (istrides = 0; istrides < nstrides; ++istrides) {
- if (strides[istrides] == 0) {
- strides[istrides] = NAD_STRIDES(axisdata)[istrides];
- }
- }
- }
- else {
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- NIT_ADVANCE_AXISDATA(ad_compress, 1);
- if (ad_compress != axisdata) {
- memcpy(ad_compress, axisdata, sizeof_axisdata);
- }
- ++new_ndim;
- }
- }
-
- /*
- * If the number of axes shrunk, reset the perm and
- * compress the data into the new layout.
- */
- if (new_ndim < ndim) {
- npy_int8 *perm = NIT_PERM(iter);
-
- /* Reset to an identity perm */
- for (idim = 0; idim < new_ndim; ++idim) {
- perm[idim] = (npy_int8)idim;
- }
- NIT_NDIM(iter) = new_ndim;
- }
-}
-
-/*
- *
- * If errmsg is non-NULL, it should point to a variable which will
- * receive the error message, and no Python exception will be set.
- * This is so that the function can be called from code not holding
- * the GIL.
- */
-NPY_NO_EXPORT int
-npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
-{
- /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
- /*int ndim = NIT_NDIM(iter);*/
- int iop = 0, nop = NIT_NOP(iter);
-
- npy_intp i;
- char *op_itflags = NIT_OPITFLAGS(iter);
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- PyArray_Descr **op_dtype = NIT_DTYPES(iter);
- npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
- char *buffer, **buffers = NBF_BUFFERS(bufferdata);
-
- for (iop = 0; iop < nop; ++iop) {
- char flags = op_itflags[iop];
-
- /*
- * If we have determined that a buffer may be needed,
- * allocate one.
- */
- if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) {
- npy_intp itemsize = op_dtype[iop]->elsize;
- buffer = PyArray_malloc(itemsize*buffersize);
- if (buffer == NULL) {
- if (errmsg == NULL) {
- PyErr_NoMemory();
- }
- else {
- *errmsg = "out of memory";
- }
- goto fail;
- }
- buffers[iop] = buffer;
- }
- }
-
- return 1;
-
-fail:
- for (i = 0; i < iop; ++i) {
- if (buffers[i] != NULL) {
- PyArray_free(buffers[i]);
- buffers[i] = NULL;
- }
- }
- return 0;
-}
-
-/*
- * This sets the AXISDATA portion of the iterator to the specified
- * iterindex, updating the pointers as well. This function does
- * no error checking.
- */
-NPY_NO_EXPORT void
-npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int nop = NIT_NOP(iter);
-
- char **dataptr;
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
- npy_intp istrides, nstrides, i, shape;
-
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- nstrides = NAD_NSTRIDES();
-
- NIT_ITERINDEX(iter) = iterindex;
-
- if (iterindex == 0) {
- dataptr = NIT_RESETDATAPTR(iter);
-
- for (idim = 0; idim < ndim; ++idim) {
- char **ptrs;
- NAD_INDEX(axisdata) = 0;
- ptrs = NAD_PTRS(axisdata);
- for (istrides = 0; istrides < nstrides; ++istrides) {
- ptrs[istrides] = dataptr[istrides];
- }
-
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- }
- }
- else {
- /*
- * Set the multi-index, from the fastest-changing to the
- * slowest-changing.
- */
- axisdata = NIT_AXISDATA(iter);
- shape = NAD_SHAPE(axisdata);
- i = iterindex;
- iterindex /= shape;
- NAD_INDEX(axisdata) = i - iterindex * shape;
- for (idim = 0; idim < ndim-1; ++idim) {
- NIT_ADVANCE_AXISDATA(axisdata, 1);
-
- shape = NAD_SHAPE(axisdata);
- i = iterindex;
- iterindex /= shape;
- NAD_INDEX(axisdata) = i - iterindex * shape;
- }
-
- dataptr = NIT_RESETDATAPTR(iter);
-
- /*
- * Accumulate the successive pointers with their
- * offsets in the opposite order, starting from the
- * original data pointers.
- */
- for (idim = 0; idim < ndim; ++idim) {
- npy_intp *strides;
- char **ptrs;
-
- strides = NAD_STRIDES(axisdata);
- ptrs = NAD_PTRS(axisdata);
-
- i = NAD_INDEX(axisdata);
-
- for (istrides = 0; istrides < nstrides; ++istrides) {
- ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
- }
-
- dataptr = ptrs;
-
- NIT_ADVANCE_AXISDATA(axisdata, -1);
- }
- }
-}
-
-/*
- * This gets called after the the buffers have been exhausted, and
- * their data needs to be written back to the arrays. The multi-index
- * must be positioned for the beginning of the buffer.
- */
-NPY_NO_EXPORT void
-npyiter_copy_from_buffers(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int ndim = NIT_NDIM(iter);
- int iop, nop = NIT_NOP(iter);
-
- char *op_itflags = NIT_OPITFLAGS(iter);
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
- *reduce_outeraxisdata = NULL;
-
- PyArray_Descr **dtypes = NIT_DTYPES(iter);
- npy_intp transfersize = NBF_SIZE(bufferdata),
- buffersize = NBF_BUFFERSIZE(bufferdata);
- npy_intp *strides = NBF_STRIDES(bufferdata),
- *ad_strides = NAD_STRIDES(axisdata);
- npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
- char **buffers = NBF_BUFFERS(bufferdata);
- char *buffer;
-
- npy_intp reduce_outerdim = 0;
- npy_intp *reduce_outerstrides = NULL;
-
- PyArray_StridedTransferFn *stransfer = NULL;
- NpyAuxData *transferdata = NULL;
-
- npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
- NPY_SIZEOF_INTP;
-
- /* If we're past the end, nothing to copy */
- if (NBF_SIZE(bufferdata) == 0) {
- return;
- }
-
- NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");
-
- if (itflags&NPY_ITFLAG_REDUCE) {
- reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
- reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
- reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
- transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
- }
-
- for (iop = 0; iop < nop; ++iop) {
- stransfer = NBF_WRITETRANSFERFN(bufferdata)[iop];
- transferdata = NBF_WRITETRANSFERDATA(bufferdata)[iop];
- buffer = buffers[iop];
- /*
- * Copy the data back to the arrays. If the type has refs,
- * this function moves them so the buffer's refs are released.
- */
- if ((stransfer != NULL) && (op_itflags[iop]&NPY_OP_ITFLAG_WRITE)) {
- /* Copy back only if the pointer was pointing to the buffer */
- npy_intp delta = (ptrs[iop] - buffer);
- if (0 <= delta && delta <= buffersize*dtypes[iop]->elsize) {
- npy_intp op_transfersize;
-
- npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape;
- int ndim_transfer;
-
- NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n",
- (int)iop);
-
- /*
- * If this operand is being reduced in the inner loop,
- * its buffering stride was set to zero, and just
- * one element was copied.
- */
- if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
- if (strides[iop] == 0) {
- if (reduce_outerstrides[iop] == 0) {
- op_transfersize = 1;
- src_stride = 0;
- dst_strides = &src_stride;
- dst_coords = &NAD_INDEX(reduce_outeraxisdata);
- dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
- ndim_transfer = 1;
- }
- else {
- op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
- src_stride = reduce_outerstrides[iop];
- dst_strides =
- &NAD_STRIDES(reduce_outeraxisdata)[iop];
- dst_coords = &NAD_INDEX(reduce_outeraxisdata);
- dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
- ndim_transfer = ndim - reduce_outerdim;
- }
- }
- else {
- if (reduce_outerstrides[iop] == 0) {
- op_transfersize = NBF_SIZE(bufferdata);
- src_stride = strides[iop];
- dst_strides = &ad_strides[iop];
- dst_coords = &NAD_INDEX(axisdata);
- dst_shape = &NAD_SHAPE(axisdata);
- ndim_transfer = reduce_outerdim ?
- reduce_outerdim : 1;
- }
- else {
- op_transfersize = transfersize;
- src_stride = strides[iop];
- dst_strides = &ad_strides[iop];
- dst_coords = &NAD_INDEX(axisdata);
- dst_shape = &NAD_SHAPE(axisdata);
- ndim_transfer = ndim;
- }
- }
- }
- else {
- op_transfersize = transfersize;
- src_stride = strides[iop];
- dst_strides = &ad_strides[iop];
- dst_coords = &NAD_INDEX(axisdata);
- dst_shape = &NAD_SHAPE(axisdata);
- ndim_transfer = ndim;
- }
-
- NPY_IT_DBG_PRINT2("Iterator: Copying buffer to "
- "operand %d (%d items)\n",
- (int)iop, (int)op_transfersize);
-
- PyArray_TransferStridedToNDim(ndim_transfer,
- ad_ptrs[iop], dst_strides, axisdata_incr,
- buffer, src_stride,
- dst_coords, axisdata_incr,
- dst_shape, axisdata_incr,
- op_transfersize, dtypes[iop]->elsize,
- stransfer,
- transferdata);
- }
- }
- /* If there's no copy back, we may have to decrement refs. In
- * this case, the transfer function has a 'decsrcref' transfer
- * function, so we can use it to do the decrement.
- */
- else if (stransfer != NULL) {
- /* Decrement refs only if the pointer was pointing to the buffer */
- npy_intp delta = (ptrs[iop] - buffer);
- if (0 <= delta && delta <= transfersize*dtypes[iop]->elsize) {
- NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer "
- "of operand %d\n", (int)iop);
- /* Decrement refs */
- stransfer(NULL, 0, buffer, dtypes[iop]->elsize,
- transfersize, dtypes[iop]->elsize,
- transferdata);
- /*
- * Zero out the memory for safety. For instance,
- * if during iteration some Python code copied an
- * array pointing into the buffer, it will get None
- * values for its references after this.
- */
- memset(buffer, 0, dtypes[iop]->elsize*transfersize);
- }
- }
- }
-
- NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");
-}
-
-/*
- * This gets called after the iterator has been positioned to a multi-index
- * for the start of a buffer. It decides which operands need a buffer,
- * and copies the data into the buffers.
- */
-NPY_NO_EXPORT void
-npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int ndim = NIT_NDIM(iter);
- int iop, nop = NIT_NOP(iter);
-
- char *op_itflags = NIT_OPITFLAGS(iter);
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
- *reduce_outeraxisdata = NULL;
-
- PyArray_Descr **dtypes = NIT_DTYPES(iter);
- PyArrayObject **operands = NIT_OPERANDS(iter);
- npy_intp *strides = NBF_STRIDES(bufferdata),
- *ad_strides = NAD_STRIDES(axisdata);
- npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
- char **buffers = NBF_BUFFERS(bufferdata);
- npy_intp iterindex, iterend, transfersize,
- singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
- int is_onestride = 0, any_buffered = 0;
-
- npy_intp *reduce_outerstrides = NULL;
- char **reduce_outerptrs = NULL;
-
- PyArray_StridedTransferFn *stransfer = NULL;
- NpyAuxData *transferdata = NULL;
-
- /*
- * Have to get this flag before npyiter_checkreducesize sets
- * it for the next iteration.
- */
- npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
- ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);
-
- npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
- NPY_SIZEOF_INTP;
-
- NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");
-
- /* Calculate the size if using any buffers */
- iterindex = NIT_ITERINDEX(iter);
- iterend = NIT_ITEREND(iter);
- transfersize = NBF_BUFFERSIZE(bufferdata);
- if (transfersize > iterend - iterindex) {
- transfersize = iterend - iterindex;
- }
-
- /* If last time around, the reduce loop structure was full, we reuse it */
- if (reuse_reduce_loops) {
- npy_intp full_transfersize;
-
- reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
- reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
- reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
- reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
- reduce_innersize = NBF_SIZE(bufferdata);
- NBF_REDUCE_POS(bufferdata) = 0;
- /*
- * Try to do make the outersize as big as possible. This allows
- * it to shrink when processing the last bit of the outer reduce loop,
- * then grow again at the beginnning of the next outer reduce loop.
- */
- NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)-
- NAD_INDEX(reduce_outeraxisdata));
- full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
- /* If the full transfer size doesn't fit in the buffer, truncate it */
- if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
- NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
- transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
- }
- else {
- transfersize = full_transfersize;
- }
- NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
-
- NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
- "itersize: %d\n",
- (int)transfersize,
- (int)reduce_innersize,
- (int)NpyIter_GetIterSize(iter));
- NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
- (int)NBF_REDUCE_OUTERSIZE(bufferdata));
- }
- /*
- * If there are any reduction operands, we may have to make
- * the size smaller so we don't copy the same value into
- * a buffer twice, as the buffering does not have a mechanism
- * to combine values itself.
- */
- else if (itflags&NPY_ITFLAG_REDUCE) {
- NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
- transfersize = npyiter_checkreducesize(iter, transfersize,
- &reduce_innersize,
- &reduce_outerdim);
- NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
- "itersize: %d\n",
- (int)transfersize,
- (int)reduce_innersize,
- (int)NpyIter_GetIterSize(iter));
-
- reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
- reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
- reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
- NBF_SIZE(bufferdata) = reduce_innersize;
- NBF_REDUCE_POS(bufferdata) = 0;
- NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
- NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
- if (reduce_innersize == 0) {
- NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
- return;
- }
- else {
- NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
- }
- }
- else {
- NBF_SIZE(bufferdata) = transfersize;
- NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
- }
-
- /* Calculate the maximum size if using a single stride and no buffers */
- singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
- if (singlestridesize > iterend - iterindex) {
- singlestridesize = iterend - iterindex;
- }
- if (singlestridesize >= transfersize) {
- is_onestride = 1;
- }
-
- for (iop = 0; iop < nop; ++iop) {
- /*
- * If the buffer is write-only, these two are NULL, and the buffer
- * pointers will be set up but the read copy won't be done
- */
- stransfer = NBF_READTRANSFERFN(bufferdata)[iop];
- transferdata = NBF_READTRANSFERDATA(bufferdata)[iop];
- switch (op_itflags[iop]&
- (NPY_OP_ITFLAG_BUFNEVER|
- NPY_OP_ITFLAG_CAST|
- NPY_OP_ITFLAG_REDUCE)) {
- /* Never need to buffer this operand */
- case NPY_OP_ITFLAG_BUFNEVER:
- ptrs[iop] = ad_ptrs[iop];
- if (itflags&NPY_ITFLAG_REDUCE) {
- reduce_outerstrides[iop] = reduce_innersize *
- strides[iop];
- reduce_outerptrs[iop] = ptrs[iop];
- }
- /*
- * Should not adjust the stride - ad_strides[iop]
- * could be zero, but strides[iop] was initialized
- * to the first non-trivial stride.
- */
- stransfer = NULL;
- break;
- /* Never need to buffer this operand */
- case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE:
- ptrs[iop] = ad_ptrs[iop];
- reduce_outerptrs[iop] = ptrs[iop];
- reduce_outerstrides[iop] = 0;
- /*
- * Should not adjust the stride - ad_strides[iop]
- * could be zero, but strides[iop] was initialized
- * to the first non-trivial stride.
- */
- stransfer = NULL;
- break;
- /* Just a copy */
- case 0:
- /*
- * No copyswap or cast was requested, so all we're
- * doing is copying the data to fill the buffer and
- * produce a single stride. If the underlying data
- * already does that, no need to copy it.
- */
- if (is_onestride) {
- ptrs[iop] = ad_ptrs[iop];
- strides[iop] = ad_strides[iop];
- stransfer = NULL;
- }
- /* If some other op is reduced, we have a double reduce loop */
- else if ((itflags&NPY_ITFLAG_REDUCE) &&
- (reduce_outerdim == 1) &&
- (transfersize/reduce_innersize <=
- NAD_SHAPE(reduce_outeraxisdata) -
- NAD_INDEX(reduce_outeraxisdata))) {
- ptrs[iop] = ad_ptrs[iop];
- reduce_outerptrs[iop] = ptrs[iop];
- strides[iop] = ad_strides[iop];
- reduce_outerstrides[iop] =
- NAD_STRIDES(reduce_outeraxisdata)[iop];
- stransfer = NULL;
- }
- else {
- /* In this case, the buffer is being used */
- ptrs[iop] = buffers[iop];
- strides[iop] = dtypes[iop]->elsize;
- if (itflags&NPY_ITFLAG_REDUCE) {
- reduce_outerstrides[iop] = reduce_innersize *
- strides[iop];
- reduce_outerptrs[iop] = ptrs[iop];
- }
- }
- break;
- /* Just a copy, but with a reduction */
- case NPY_OP_ITFLAG_REDUCE:
- if (ad_strides[iop] == 0) {
- strides[iop] = 0;
- /* It's all in one stride in the inner loop dimension */
- if (is_onestride) {
- NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop);
- ptrs[iop] = ad_ptrs[iop];
- reduce_outerstrides[iop] = 0;
- stransfer = NULL;
- }
- /* It's all in one stride in the reduce outer loop */
- else if ((reduce_outerdim > 0) &&
- (transfersize/reduce_innersize <=
- NAD_SHAPE(reduce_outeraxisdata) -
- NAD_INDEX(reduce_outeraxisdata))) {
- NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n",
- (int)iop);
- ptrs[iop] = ad_ptrs[iop];
- /* Outer reduce loop advances by one item */
- reduce_outerstrides[iop] =
- NAD_STRIDES(reduce_outeraxisdata)[iop];
- stransfer = NULL;
- }
- /* In this case, the buffer is being used */
- else {
- NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop);
- ptrs[iop] = buffers[iop];
- /* Both outer and inner reduce loops have stride 0 */
- if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
- reduce_outerstrides[iop] = 0;
- }
- /* Outer reduce loop advances by one item */
- else {
- reduce_outerstrides[iop] = dtypes[iop]->elsize;
- }
- }
-
- }
- else if (is_onestride) {
- NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop);
- ptrs[iop] = ad_ptrs[iop];
- strides[iop] = ad_strides[iop];
- reduce_outerstrides[iop] = 0;
- stransfer = NULL;
- }
- else {
- /* It's all in one stride in the reduce outer loop */
- if ((reduce_outerdim > 0) &&
- (transfersize/reduce_innersize <=
- NAD_SHAPE(reduce_outeraxisdata) -
- NAD_INDEX(reduce_outeraxisdata))) {
- ptrs[iop] = ad_ptrs[iop];
- strides[iop] = ad_strides[iop];
- /* Outer reduce loop advances by one item */
- reduce_outerstrides[iop] =
- NAD_STRIDES(reduce_outeraxisdata)[iop];
- stransfer = NULL;
- }
- /* In this case, the buffer is being used */
- else {
- ptrs[iop] = buffers[iop];
- strides[iop] = dtypes[iop]->elsize;
-
- if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
- /* Reduction in outer reduce loop */
- reduce_outerstrides[iop] = 0;
- }
- else {
- /* Advance to next items in outer reduce loop */
- reduce_outerstrides[iop] = reduce_innersize *
- dtypes[iop]->elsize;
- }
- }
- }
- reduce_outerptrs[iop] = ptrs[iop];
- break;
- default:
- /* In this case, the buffer is always being used */
- any_buffered = 1;
-
- if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) {
- ptrs[iop] = buffers[iop];
- strides[iop] = dtypes[iop]->elsize;
- if (itflags&NPY_ITFLAG_REDUCE) {
- reduce_outerstrides[iop] = reduce_innersize *
- strides[iop];
- reduce_outerptrs[iop] = ptrs[iop];
- }
- }
- /* The buffer is being used with reduction */
- else {
- ptrs[iop] = buffers[iop];
- if (ad_strides[iop] == 0) {
- NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop);
- strides[iop] = 0;
- /* Both outer and inner reduce loops have stride 0 */
- if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
- NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
- reduce_outerstrides[iop] = 0;
- }
- /* Outer reduce loop advances by one item */
- else {
- NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
- reduce_outerstrides[iop] = dtypes[iop]->elsize;
- }
- }
- else {
- NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop);
- strides[iop] = dtypes[iop]->elsize;
-
- if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
- NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
- /* Reduction in outer reduce loop */
- reduce_outerstrides[iop] = 0;
- }
- else {
- NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
- /* Advance to next items in outer reduce loop */
- reduce_outerstrides[iop] = reduce_innersize *
- dtypes[iop]->elsize;
- }
- }
- reduce_outerptrs[iop] = ptrs[iop];
- }
- break;
- }
-
- if (stransfer != NULL) {
- npy_intp src_itemsize = PyArray_DESCR(operands[iop])->elsize;
- npy_intp op_transfersize;
-
- npy_intp dst_stride, *src_strides, *src_coords, *src_shape;
- int ndim_transfer;
-
- npy_bool skip_transfer = 0;
-
- /* If stransfer wasn't set to NULL, buffering is required */
- any_buffered = 1;
-
- /*
- * If this operand is being reduced in the inner loop,
- * set its buffering stride to zero, and just copy
- * one element.
- */
- if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
- if (ad_strides[iop] == 0) {
- strides[iop] = 0;
- if (reduce_outerstrides[iop] == 0) {
- op_transfersize = 1;
- dst_stride = 0;
- src_strides = &dst_stride;
- src_coords = &NAD_INDEX(reduce_outeraxisdata);
- src_shape = &NAD_SHAPE(reduce_outeraxisdata);
- ndim_transfer = 1;
-
- /*
- * When we're reducing a single element, and
- * it's still the same element, don't overwrite
- * it even when reuse reduce loops is unset.
- * This preserves the precision of the
- * intermediate calculation.
- */
- if (prev_dataptrs &&
- prev_dataptrs[iop] == ad_ptrs[iop]) {
- NPY_IT_DBG_PRINT1("Iterator: skipping operand %d"
- " copy because it's a 1-element reduce\n",
- (int)iop);
-
- skip_transfer = 1;
- }
- }
- else {
- op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
- dst_stride = reduce_outerstrides[iop];
- src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop];
- src_coords = &NAD_INDEX(reduce_outeraxisdata);
- src_shape = &NAD_SHAPE(reduce_outeraxisdata);
- ndim_transfer = ndim - reduce_outerdim;
- }
- }
- else {
- if (reduce_outerstrides[iop] == 0) {
- op_transfersize = NBF_SIZE(bufferdata);
- dst_stride = strides[iop];
- src_strides = &ad_strides[iop];
- src_coords = &NAD_INDEX(axisdata);
- src_shape = &NAD_SHAPE(axisdata);
- ndim_transfer = reduce_outerdim ? reduce_outerdim : 1;
- }
- else {
- op_transfersize = transfersize;
- dst_stride = strides[iop];
- src_strides = &ad_strides[iop];
- src_coords = &NAD_INDEX(axisdata);
- src_shape = &NAD_SHAPE(axisdata);
- ndim_transfer = ndim;
- }
- }
- }
- else {
- op_transfersize = transfersize;
- dst_stride = strides[iop];
- src_strides = &ad_strides[iop];
- src_coords = &NAD_INDEX(axisdata);
- src_shape = &NAD_SHAPE(axisdata);
- ndim_transfer = ndim;
- }
-
- /*
- * If the whole buffered loop structure remains the same,
- * and the source pointer for this data didn't change,
- * we don't have to copy the data again.
- */
- if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) {
- NPY_IT_DBG_PRINT2("Iterator: skipping operands %d "
- "copy (%d items) because loops are reused and the data "
- "pointer didn't change\n",
- (int)iop, (int)op_transfersize);
- skip_transfer = 1;
- }
-
- /* If the data type requires zero-inititialization */
- if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
- NPY_IT_DBG_PRINT("Iterator: Buffer requires init, "
- "memsetting to 0\n");
- memset(ptrs[iop], 0, dtypes[iop]->elsize*op_transfersize);
- /* Can't skip the transfer in this case */
- skip_transfer = 0;
- }
-
- if (!skip_transfer) {
- NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to "
- "buffer (%d items)\n",
- (int)iop, (int)op_transfersize);
-
- PyArray_TransferNDimToStrided(ndim_transfer,
- ptrs[iop], dst_stride,
- ad_ptrs[iop], src_strides, axisdata_incr,
- src_coords, axisdata_incr,
- src_shape, axisdata_incr,
- op_transfersize, src_itemsize,
- stransfer,
- transferdata);
- }
- }
- else if (ptrs[iop] == buffers[iop]) {
- /* If the data type requires zero-inititialization */
- if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
- NPY_IT_DBG_PRINT1("Iterator: Write-only buffer for "
- "operand %d requires init, "
- "memsetting to 0\n", (int)iop);
- memset(ptrs[iop], 0, dtypes[iop]->elsize*transfersize);
- }
- }
-
- }
-
- /*
- * If buffering wasn't needed, we can grow the inner
- * loop to as large as possible.
- *
- * TODO: Could grow REDUCE loop too with some more logic above.
- */
- if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
- !(itflags&NPY_ITFLAG_REDUCE)) {
- if (singlestridesize > transfersize) {
- NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
- "from %d to %d since buffering wasn't needed\n",
- (int)NBF_SIZE(bufferdata), (int)singlestridesize);
- NBF_SIZE(bufferdata) = singlestridesize;
- NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
- }
- }
-
- NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
-
- NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
- "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
-}
-
-/*
- * This checks how much space can be buffered without encountering the
- * same value twice, or for operands whose innermost stride is zero,
- * without encountering a different value. By reducing the buffered
- * amount to this size, reductions can be safely buffered.
- *
- * Reductions are buffered with two levels of looping, to avoid
- * frequent copying to the buffers. The return value is the over-all
- * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
- * receives the size of the inner of the two levels of looping.
- *
- * The value placed in reduce_outerdim is the index into the AXISDATA
- * for where the second level of the double loop begins.
- *
- * The return value is always a multiple of the value placed in
- * reduce_innersize.
- */
-static npy_intp
-npyiter_checkreducesize(NpyIter *iter, npy_intp count,
- npy_intp *reduce_innersize,
- npy_intp *reduce_outerdim)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int iop, nop = NIT_NOP(iter);
-
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
- npy_intp coord, shape, *strides;
- npy_intp reducespace = 1, factor;
- npy_bool nonzerocoord = 0;
-
- char *op_itflags = NIT_OPITFLAGS(iter);
- char stride0op[NPY_MAXARGS];
-
- /* Default to no outer axis */
- *reduce_outerdim = 0;
-
- /* If there's only one dimension, no need to calculate anything */
- if (ndim == 1) {
- *reduce_innersize = count;
- return count;
- }
-
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- axisdata = NIT_AXISDATA(iter);
-
- /* Indicate which REDUCE operands have stride 0 in the inner loop */
- strides = NAD_STRIDES(axisdata);
- for (iop = 0; iop < nop; ++iop) {
- stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
- (strides[iop] == 0);
- NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
- "the inner loop? %d\n", iop, (int)stride0op[iop]);
- }
- shape = NAD_SHAPE(axisdata);
- coord = NAD_INDEX(axisdata);
- reducespace += (shape-coord-1);
- factor = shape;
- NIT_ADVANCE_AXISDATA(axisdata, 1);
-
- /* Go forward through axisdata, calculating the space available */
- for (idim = 1; idim < ndim && reducespace < count;
- ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
- (int)reducespace, (int)count);
-
- strides = NAD_STRIDES(axisdata);
- for (iop = 0; iop < nop; ++iop) {
- /*
- * If a reduce stride switched from zero to non-zero, or
- * vice versa, that's the point where the data will stop
- * being the same element or will repeat, and if the
- * buffer starts with an all zero multi-index up to this
- * point, gives us the reduce_innersize.
- */
- if((stride0op[iop] && (strides[iop] != 0)) ||
- (!stride0op[iop] &&
- (strides[iop] == 0) &&
- (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
- NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
- "buffer to %d\n", (int)reducespace);
- /*
- * If we already found more elements than count, or
- * the starting coordinate wasn't zero, the two-level
- * looping is unnecessary/can't be done, so return.
- */
- if (count <= reducespace) {
- *reduce_innersize = count;
- return count;
- }
- else if (nonzerocoord) {
- if (reducespace < count) {
- count = reducespace;
- }
- *reduce_innersize = count;
- return count;
- }
- else {
- *reduce_innersize = reducespace;
- break;
- }
- }
- }
- /* If we broke out of the loop early, we found reduce_innersize */
- if (iop != nop) {
- NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
- "reduce (%d of %d)\n", iop, nop);
- break;
- }
-
- shape = NAD_SHAPE(axisdata);
- coord = NAD_INDEX(axisdata);
- if (coord != 0) {
- nonzerocoord = 1;
- }
- reducespace += (shape-coord-1) * factor;
- factor *= shape;
- }
-
- /*
- * If there was any non-zero coordinate, the reduction inner
- * loop doesn't fit in the buffersize, or the reduction inner loop
- * covered the entire iteration size, can't do the double loop.
- */
- if (nonzerocoord || count < reducespace || idim == ndim) {
- if (reducespace < count) {
- count = reducespace;
- }
- *reduce_innersize = count;
- /* In this case, we can't reuse the reduce loops */
- NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
- return count;
- }
-
- /* In this case, we can reuse the reduce loops */
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
-
- *reduce_innersize = reducespace;
- count /= reducespace;
-
- NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
- (int)reducespace, (int)count);
-
- /*
- * Continue through the rest of the dimensions. If there are
- * two separated reduction axes, we may have to cut the buffer
- * short again.
- */
- *reduce_outerdim = idim;
- reducespace = 1;
- factor = 1;
- /* Indicate which REDUCE operands have stride 0 at the current level */
- strides = NAD_STRIDES(axisdata);
- for (iop = 0; iop < nop; ++iop) {
- stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
- (strides[iop] == 0);
- NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
- "the outer loop? %d\n", iop, (int)stride0op[iop]);
- }
- shape = NAD_SHAPE(axisdata);
- coord = NAD_INDEX(axisdata);
- reducespace += (shape-coord-1) * factor;
- factor *= shape;
- NIT_ADVANCE_AXISDATA(axisdata, 1);
- ++idim;
-
- for (; idim < ndim && reducespace < count;
- ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
- (int)reducespace, (int)count);
- strides = NAD_STRIDES(axisdata);
- for (iop = 0; iop < nop; ++iop) {
- /*
- * If a reduce stride switched from zero to non-zero, or
- * vice versa, that's the point where the data will stop
- * being the same element or will repeat, and if the
- * buffer starts with an all zero multi-index up to this
- * point, gives us the reduce_innersize.
- */
- if((stride0op[iop] && (strides[iop] != 0)) ||
- (!stride0op[iop] &&
- (strides[iop] == 0) &&
- (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
- NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
- "buffer to %d\n", (int)reducespace);
- /*
- * This terminates the outer level of our double loop.
- */
- if (count <= reducespace) {
- return count * (*reduce_innersize);
- }
- else {
- return reducespace * (*reduce_innersize);
- }
- }
- }
-
- shape = NAD_SHAPE(axisdata);
- coord = NAD_INDEX(axisdata);
- if (coord != 0) {
- nonzerocoord = 1;
- }
- reducespace += (shape-coord-1) * factor;
- factor *= shape;
- }
-
- if (reducespace < count) {
- count = reducespace;
- }
- return count * (*reduce_innersize);
-}
-
-
-
-/*NUMPY_API
- * For debugging
- */
-NPY_NO_EXPORT void
-NpyIter_DebugPrint(NpyIter *iter)
-{
- npy_uint32 itflags = NIT_ITFLAGS(iter);
- int idim, ndim = NIT_NDIM(iter);
- int iop, nop = NIT_NOP(iter);
-
- NpyIter_AxisData *axisdata;
- npy_intp sizeof_axisdata;
-
- PyGILState_STATE gilstate = PyGILState_Ensure();
-
- printf("\n------ BEGIN ITERATOR DUMP ------\n");
- printf("| Iterator Address: %p\n", (void *)iter);
- printf("| ItFlags: ");
- if (itflags&NPY_ITFLAG_IDENTPERM)
- printf("IDENTPERM ");
- if (itflags&NPY_ITFLAG_NEGPERM)
- printf("NEGPERM ");
- if (itflags&NPY_ITFLAG_HASINDEX)
- printf("HASINDEX ");
- if (itflags&NPY_ITFLAG_HASMULTIINDEX)
- printf("HASMULTIINDEX ");
- if (itflags&NPY_ITFLAG_FORCEDORDER)
- printf("FORCEDORDER ");
- if (itflags&NPY_ITFLAG_EXLOOP)
- printf("EXLOOP ");
- if (itflags&NPY_ITFLAG_RANGE)
- printf("RANGE ");
- if (itflags&NPY_ITFLAG_BUFFER)
- printf("BUFFER ");
- if (itflags&NPY_ITFLAG_GROWINNER)
- printf("GROWINNER ");
- if (itflags&NPY_ITFLAG_ONEITERATION)
- printf("ONEITERATION ");
- if (itflags&NPY_ITFLAG_DELAYBUF)
- printf("DELAYBUF ");
- if (itflags&NPY_ITFLAG_NEEDSAPI)
- printf("NEEDSAPI ");
- if (itflags&NPY_ITFLAG_REDUCE)
- printf("REDUCE ");
- if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS)
- printf("REUSE_REDUCE_LOOPS ");
- printf("\n");
- printf("| NDim: %d\n", (int)ndim);
- printf("| NOp: %d\n", (int)nop);
- printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));
- printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));
- printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));
- printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));
- printf("| Iterator SizeOf: %d\n",
- (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
- printf("| BufferData SizeOf: %d\n",
- (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));
- printf("| AxisData SizeOf: %d\n",
- (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));
- printf("|\n");
-
- printf("| Perm: ");
- for (idim = 0; idim < ndim; ++idim) {
- printf("%d ", (int)NIT_PERM(iter)[idim]);
- }
- printf("\n");
- printf("| DTypes: ");
- for (iop = 0; iop < nop; ++iop) {
- printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
- }
- printf("\n");
- printf("| DTypes: ");
- for (iop = 0; iop < nop; ++iop) {
- if (NIT_DTYPES(iter)[iop] != NULL)
- PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
- else
- printf("(nil) ");
- printf(" ");
- }
- printf("\n");
- printf("| InitDataPtrs: ");
- for (iop = 0; iop < nop; ++iop) {
- printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
- }
- printf("\n");
- printf("| BaseOffsets: ");
- for (iop = 0; iop < nop; ++iop) {
- printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
- }
- printf("\n");
- if (itflags&NPY_ITFLAG_HASINDEX) {
- printf("| InitIndex: %d\n",
- (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
- }
- printf("| Operands: ");
- for (iop = 0; iop < nop; ++iop) {
- printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
- }
- printf("\n");
- printf("| Operand DTypes: ");
- for (iop = 0; iop < nop; ++iop) {
- PyArray_Descr *dtype;
- if (NIT_OPERANDS(iter)[iop] != NULL) {
- dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
- if (dtype != NULL)
- PyObject_Print((PyObject *)dtype, stdout, 0);
- else
- printf("(nil) ");
- }
- else {
- printf("(op nil) ");
- }
- printf(" ");
- }
- printf("\n");
- printf("| OpItFlags:\n");
- for (iop = 0; iop < nop; ++iop) {
- printf("| Flags[%d]: ", (int)iop);
- if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
- printf("READ ");
- if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
- printf("WRITE ");
- if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
- printf("CAST ");
- if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
- printf("BUFNEVER ");
- if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
- printf("ALIGNED ");
- if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
- printf("REDUCE ");
- printf("\n");
- }
- printf("|\n");
-
- if (itflags&NPY_ITFLAG_BUFFER) {
- NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
- printf("| BufferData:\n");
- printf("| BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
- printf("| Size: %d\n", (int)NBF_SIZE(bufferdata));
- printf("| BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));
- if (itflags&NPY_ITFLAG_REDUCE) {
- printf("| REDUCE Pos: %d\n",
- (int)NBF_REDUCE_POS(bufferdata));
- printf("| REDUCE OuterSize: %d\n",
- (int)NBF_REDUCE_OUTERSIZE(bufferdata));
- printf("| REDUCE OuterDim: %d\n",
- (int)NBF_REDUCE_OUTERDIM(bufferdata));
- }
- printf("| Strides: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
- printf("\n");
- /* Print the fixed strides when there's no inner loop */
- if (itflags&NPY_ITFLAG_EXLOOP) {
- npy_intp fixedstrides[NPY_MAXDIMS];
- printf("| Fixed Strides: ");
- NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
- for (iop = 0; iop < nop; ++iop)
- printf("%d ", (int)fixedstrides[iop]);
- printf("\n");
- }
- printf("| Ptrs: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
- printf("\n");
- if (itflags&NPY_ITFLAG_REDUCE) {
- printf("| REDUCE Outer Strides: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
- printf("\n");
- printf("| REDUCE Outer Ptrs: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
- printf("\n");
- }
- printf("| ReadTransferFn: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_READTRANSFERFN(bufferdata)[iop]);
- printf("\n");
- printf("| ReadTransferData: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_READTRANSFERDATA(bufferdata)[iop]);
- printf("\n");
- printf("| WriteTransferFn: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_WRITETRANSFERFN(bufferdata)[iop]);
- printf("\n");
- printf("| WriteTransferData: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_WRITETRANSFERDATA(bufferdata)[iop]);
- printf("\n");
- printf("| Buffers: ");
- for (iop = 0; iop < nop; ++iop)
- printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
- printf("\n");
- printf("|\n");
- }
-
- axisdata = NIT_AXISDATA(iter);
- sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
- for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
- printf("| AxisData[%d]:\n", (int)idim);
- printf("| Shape: %d\n", (int)NAD_SHAPE(axisdata));
- printf("| Index: %d\n", (int)NAD_INDEX(axisdata));
- printf("| Strides: ");
- for (iop = 0; iop < nop; ++iop) {
- printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
- }
- printf("\n");
- if (itflags&NPY_ITFLAG_HASINDEX) {
- printf("| Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
- }
- printf("| Ptrs: ");
- for (iop = 0; iop < nop; ++iop) {
- printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
- }
- printf("\n");
- if (itflags&NPY_ITFLAG_HASINDEX) {
- printf("| Index Value: %d\n",
- (int)((npy_intp*)NAD_PTRS(axisdata))[nop]);
- }
- }
-
- printf("------- END ITERATOR DUMP -------\n");
-
- PyGILState_Release(gilstate);
-}
-
#undef NPY_ITERATOR_IMPLEMENTATION_CODE
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
new file mode 100644
index 000000000..10f4afd8b
--- /dev/null
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -0,0 +1,2601 @@
+/*
+ * This file implements most of the main API functions of NumPy's nditer.
+ * This excludes functions specialized using the templating system.
+ *
+ * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
+ * The Univerity of British Columbia
+ *
+ * Copyright (c) 2011 Enthought, Inc
+ *
+ * See LICENSE.txt for the license.
+ */
+
+/* Indicate that this .c file is allowed to include the header */
+#define NPY_ITERATOR_IMPLEMENTATION_CODE
+#include "nditer_impl.h"
+
+/* Internal helper functions private to this file */
+static npy_intp
+npyiter_checkreducesize(NpyIter *iter, npy_intp count,
+ npy_intp *reduce_innersize,
+ npy_intp *reduce_outerdim);
+
+/*NUMPY_API
+ * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
+ * was set for iterator creation, and does not work if buffering is
+ * enabled. This function also resets the iterator to its initial state.
+ *
+ * Returns NPY_SUCCEED or NPY_FAIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_RemoveAxis(NpyIter *iter, int axis)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int iop, nop = NIT_NOP(iter);
+
+ int xdim = 0;
+ npy_int8 *perm = NIT_PERM(iter);
+ NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;
+ npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
+ char **resetdataptr = NIT_RESETDATAPTR(iter);
+
+ if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Iterator RemoveAxis may only be called "
+ "if a multi-index is being tracked");
+ return NPY_FAIL;
+ }
+ else if (itflags&NPY_ITFLAG_HASINDEX) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Iterator RemoveAxis may not be called on "
+ "an index is being tracked");
+ return NPY_FAIL;
+ }
+ else if (itflags&NPY_ITFLAG_BUFFER) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Iterator RemoveAxis may not be called on "
+ "a buffered iterator");
+ return NPY_FAIL;
+ }
+ else if (axis < 0 || axis >= ndim) {
+ PyErr_SetString(PyExc_ValueError,
+ "axis out of bounds in iterator RemoveAxis");
+ return NPY_FAIL;
+ }
+
+ /* Reverse axis, since the iterator treats them that way */
+ axis = ndim - 1 - axis;
+
+ /* First find the axis in question */
+ for (idim = 0; idim < ndim; ++idim) {
+ /* If this is it, and it's iterated forward, done */
+ if (perm[idim] == axis) {
+ xdim = idim;
+ break;
+ }
+ /* If this is it, but it's iterated backward, must reverse the axis */
+ else if (-1 - perm[idim] == axis) {
+ npy_intp *strides = NAD_STRIDES(axisdata_del);
+ npy_intp shape = NAD_SHAPE(axisdata_del), offset;
+
+ xdim = idim;
+
+ /*
+ * Adjust baseoffsets and resetbaseptr back to the start of
+ * this axis.
+ */
+ for (iop = 0; iop < nop; ++iop) {
+ offset = (shape-1)*strides[iop];
+ baseoffsets[iop] += offset;
+ resetdataptr[iop] += offset;
+ }
+ break;
+ }
+
+ NIT_ADVANCE_AXISDATA(axisdata_del, 1);
+ }
+
+ if (idim == ndim) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "internal error in iterator perm");
+ return NPY_FAIL;
+ }
+
+ if (NAD_SHAPE(axisdata_del) == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot remove a zero-sized axis from an iterator");
+ return NPY_FAIL;
+ }
+
+ /* Adjust the permutation */
+ for (idim = 0; idim < ndim-1; ++idim) {
+ npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
+ if (p >= 0) {
+ if (p > axis) {
+ --p;
+ }
+ }
+ else if (p <= 0) {
+ if (p < -1-axis) {
+ ++p;
+ }
+ }
+ perm[idim] = p;
+ }
+
+ /* Adjust the iteration size */
+ NIT_ITERSIZE(iter) /= NAD_SHAPE(axisdata_del);
+
+ /* Shift all the axisdata structures by one */
+ axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
+ memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);
+
+ /* If there is more than one dimension, shrink the iterator */
+ if (ndim > 1) {
+ NIT_NDIM(iter) = ndim-1;
+ }
+ /* Otherwise convert it to a singleton dimension */
+ else {
+ npy_intp *strides = NAD_STRIDES(axisdata_del);
+ NAD_SHAPE(axisdata_del) = 1;
+ for (iop = 0; iop < nop; ++iop) {
+ strides[iop] = 0;
+ }
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
+ }
+
+ return NpyIter_Reset(iter, NULL);
+}
+
+/*NUMPY_API
+ * Removes multi-index support from an iterator.
+ *
+ * Returns NPY_SUCCEED or NPY_FAIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_RemoveMultiIndex(NpyIter *iter)
+{
+ npy_uint32 itflags;
+
+ /* Make sure the iterator is reset */
+ if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
+ return NPY_FAIL;
+ }
+
+ itflags = NIT_ITFLAGS(iter);
+ if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
+ NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
+ npyiter_coalesce_axes(iter);
+ }
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Removes the inner loop handling (so HasExternalLoop returns true)
+ */
+NPY_NO_EXPORT int
+NpyIter_EnableExternalLoop(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ /* Check conditions under which this can be done */
+ if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Iterator flag EXTERNAL_LOOP cannot be used "
+ "if an index or multi-index is being tracked");
+ return NPY_FAIL;
+ }
+ if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP))
+ == (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Iterator flag EXTERNAL_LOOP cannot be used "
+ "with ranged iteration unless buffering is also enabled");
+ return NPY_FAIL;
+ }
+ /* Set the flag */
+ if (!(itflags&NPY_ITFLAG_EXLOOP)) {
+ itflags |= NPY_ITFLAG_EXLOOP;
+ NIT_ITFLAGS(iter) = itflags;
+
+ /*
+ * Check whether we can apply the single iteration
+ * optimization to the iternext function.
+ */
+ if (!(itflags&NPY_ITFLAG_BUFFER)) {
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
+ }
+ }
+ }
+
+ /* Reset the iterator */
+ return NpyIter_Reset(iter, NULL);
+}
+
+/*NUMPY_API
+ * Resets the iterator to its initial state
+ *
+ * If errmsg is non-NULL, it should point to a variable which will
+ * receive the error message, and no Python exception will be set.
+ * This is so that the function can be called from code not holding
+ * the GIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_Reset(NpyIter *iter, char **errmsg)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *bufferdata;
+
+ /* If buffer allocation was delayed, do it now */
+ if (itflags&NPY_ITFLAG_DELAYBUF) {
+ if (!npyiter_allocate_buffers(iter, errmsg)) {
+ return NPY_FAIL;
+ }
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
+ }
+ else {
+ /*
+ * If the iterindex is already right, no need to
+ * do anything
+ */
+ bufferdata = NIT_BUFFERDATA(iter);
+ if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
+ NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
+ NBF_SIZE(bufferdata) > 0) {
+ return NPY_SUCCEED;
+ }
+
+ /* Copy any data from the buffers back to the arrays */
+ npyiter_copy_from_buffers(iter);
+ }
+ }
+
+ npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ /* Prepare the next buffers and set iterend/size */
+ npyiter_copy_to_buffers(iter, NULL);
+ }
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Resets the iterator to its initial state, with new base data pointers
+ *
+ * If errmsg is non-NULL, it should point to a variable which will
+ * receive the error message, and no Python exception will be set.
+ * This is so that the function can be called from code not holding
+ * the GIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int iop, nop = NIT_NOP(iter);
+
+ char **resetdataptr = NIT_RESETDATAPTR(iter);
+ npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ /* If buffer allocation was delayed, do it now */
+ if (itflags&NPY_ITFLAG_DELAYBUF) {
+ if (!npyiter_allocate_buffers(iter, errmsg)) {
+ return NPY_FAIL;
+ }
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
+ }
+ else {
+ /* Copy any data from the buffers back to the arrays */
+ npyiter_copy_from_buffers(iter);
+ }
+ }
+
+ /* The new data pointers for resetting */
+ for (iop = 0; iop < nop; ++iop) {
+ resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
+ }
+
+ npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ /* Prepare the next buffers and set iterend/size */
+ npyiter_copy_to_buffers(iter, NULL);
+ }
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Resets the iterator to a new iterator index range
+ *
+ * If errmsg is non-NULL, it should point to a variable which will
+ * receive the error message, and no Python exception will be set.
+ * This is so that the function can be called from code not holding
+ * the GIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_ResetToIterIndexRange(NpyIter *iter,
+ npy_intp istart, npy_intp iend, char **errmsg)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ /*int nop = NIT_NOP(iter);*/
+
+ if (!(itflags&NPY_ITFLAG_RANGE)) {
+ if (errmsg == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call ResetToIterIndexRange on an iterator without "
+ "requesting ranged iteration support in the constructor");
+ }
+ else {
+ *errmsg = "Cannot call ResetToIterIndexRange on an iterator "
+ "without requesting ranged iteration support in the "
+ "constructor";
+ }
+ return NPY_FAIL;
+ }
+
+ if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
+ if (errmsg == NULL) {
+ PyErr_Format(PyExc_ValueError,
+ "Out-of-bounds range [%d, %d) passed to "
+ "ResetToIterIndexRange", (int)istart, (int)iend);
+ }
+ else {
+ *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
+ }
+ return NPY_FAIL;
+ }
+ else if (iend < istart) {
+ if (errmsg == NULL) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid range [%d, %d) passed to ResetToIterIndexRange",
+ (int)istart, (int)iend);
+ }
+ else {
+ *errmsg = "Invalid range passed to ResetToIterIndexRange";
+ }
+ return NPY_FAIL;
+ }
+
+ NIT_ITERSTART(iter) = istart;
+ NIT_ITEREND(iter) = iend;
+
+ return NpyIter_Reset(iter, errmsg);
+}
+
+/*NUMPY_API
+ * Sets the iterator to the specified multi-index, which must have the
+ * correct number of entries for 'ndim'. It is only valid
+ * when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation
+ * fails if the multi-index is out of bounds.
+ *
+ * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
+ */
+NPY_NO_EXPORT int
+NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp *multi_index)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ npy_intp iterindex, factor;
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+ npy_int8 *perm;
+
+ if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoMultiIndex on an iterator without "
+ "requesting a multi-index in the constructor");
+ return NPY_FAIL;
+ }
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoMultiIndex on an iterator which "
+ "is buffered");
+ return NPY_FAIL;
+ }
+
+ if (itflags&NPY_ITFLAG_EXLOOP) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoMultiIndex on an iterator which "
+ "has the flag EXTERNAL_LOOP");
+ return NPY_FAIL;
+ }
+
+ perm = NIT_PERM(iter);
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ /* Compute the iterindex corresponding to the multi-index */
+ iterindex = 0;
+ factor = 1;
+ for (idim = 0; idim < ndim; ++idim) {
+ npy_int8 p = perm[idim];
+ npy_intp i, shape;
+
+ shape = NAD_SHAPE(axisdata);
+ if (p < 0) {
+ /* If the perm entry is negative, reverse the index */
+ i = shape - multi_index[ndim+p] - 1;
+ }
+ else {
+ i = multi_index[ndim-p-1];
+ }
+
+ /* Bounds-check this index */
+ if (i >= 0 && i < shape) {
+ iterindex += factor * i;
+ factor *= shape;
+ }
+ else {
+ PyErr_SetString(PyExc_IndexError,
+ "Iterator GotoMultiIndex called with an out-of-bounds "
+ "multi-index");
+ return NPY_FAIL;
+ }
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+
+ if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
+ PyErr_SetString(PyExc_IndexError,
+ "Iterator GotoMultiIndex called with a multi-index outside the "
+ "restricted iteration range");
+ return NPY_FAIL;
+ }
+
+ npyiter_goto_iterindex(iter, iterindex);
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * If the iterator is tracking an index, sets the iterator
+ * to the specified index.
+ *
+ * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
+ */
+NPY_NO_EXPORT int
+NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ npy_intp iterindex, factor;
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+
+ if (!(itflags&NPY_ITFLAG_HASINDEX)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoIndex on an iterator without "
+ "requesting a C or Fortran index in the constructor");
+ return NPY_FAIL;
+ }
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoIndex on an iterator which "
+ "is buffered");
+ return NPY_FAIL;
+ }
+
+ if (itflags&NPY_ITFLAG_EXLOOP) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoIndex on an iterator which "
+ "has the flag EXTERNAL_LOOP");
+ return NPY_FAIL;
+ }
+
+ if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
+ PyErr_SetString(PyExc_IndexError,
+ "Iterator GotoIndex called with an out-of-bounds "
+ "index");
+ return NPY_FAIL;
+ }
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ /* Compute the iterindex corresponding to the flat_index */
+ iterindex = 0;
+ factor = 1;
+ for (idim = 0; idim < ndim; ++idim) {
+ npy_intp i, shape, iterstride;
+
+ iterstride = NAD_STRIDES(axisdata)[nop];
+ shape = NAD_SHAPE(axisdata);
+
+ /* Extract the index from the flat_index */
+ if (iterstride == 0) {
+ i = 0;
+ }
+ else if (iterstride < 0) {
+ i = shape - (flat_index/(-iterstride))%shape - 1;
+ }
+ else {
+ i = (flat_index/iterstride)%shape;
+ }
+
+ /* Add its contribution to iterindex */
+ iterindex += factor * i;
+ factor *= shape;
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+
+
+ if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
+ PyErr_SetString(PyExc_IndexError,
+ "Iterator GotoIndex called with an index outside the "
+ "restricted iteration range.");
+ return NPY_FAIL;
+ }
+
+ npyiter_goto_iterindex(iter, iterindex);
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Sets the iterator position to the specified iterindex,
+ * which matches the iteration order of the iterator.
+ *
+ * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
+ */
+NPY_NO_EXPORT int
+NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int iop, nop = NIT_NOP(iter);
+
+ if (itflags&NPY_ITFLAG_EXLOOP) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot call GotoIterIndex on an iterator which "
+ "has the flag EXTERNAL_LOOP");
+ return NPY_FAIL;
+ }
+
+ if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
+ PyErr_SetString(PyExc_IndexError,
+ "Iterator GotoIterIndex called with an iterindex outside the "
+ "iteration range.");
+ return NPY_FAIL;
+ }
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ npy_intp bufiterend, size;
+
+ size = NBF_SIZE(bufferdata);
+ bufiterend = NBF_BUFITEREND(bufferdata);
+ /* Check if the new iterindex is already within the buffer */
+ if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
+ iterindex >= bufiterend - size) {
+ npy_intp *strides, delta;
+ char **ptrs;
+
+ strides = NBF_STRIDES(bufferdata);
+ ptrs = NBF_PTRS(bufferdata);
+ delta = iterindex - NIT_ITERINDEX(iter);
+
+ for (iop = 0; iop < nop; ++iop) {
+ ptrs[iop] += delta * strides[iop];
+ }
+
+ NIT_ITERINDEX(iter) = iterindex;
+ }
+ /* Start the buffer at the provided iterindex */
+ else {
+ /* Write back to the arrays */
+ npyiter_copy_from_buffers(iter);
+
+ npyiter_goto_iterindex(iter, iterindex);
+
+ /* Prepare the next buffers and set iterend/size */
+ npyiter_copy_to_buffers(iter, NULL);
+ }
+ }
+ else {
+ npyiter_goto_iterindex(iter, iterindex);
+ }
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Gets the current iteration index
+ */
+NPY_NO_EXPORT npy_intp
+NpyIter_GetIterIndex(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ /* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */
+ if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) {
+ return NIT_ITERINDEX(iter);
+ }
+ else {
+ npy_intp iterindex;
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+
+ iterindex = 0;
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
+
+ for (idim = ndim-2; idim >= 0; --idim) {
+ iterindex += NAD_INDEX(axisdata);
+ NIT_ADVANCE_AXISDATA(axisdata, -1);
+ iterindex *= NAD_SHAPE(axisdata);
+ }
+ iterindex += NAD_INDEX(axisdata);
+
+ return iterindex;
+ }
+}
+
+/*NUMPY_API
+ * Whether the buffer allocation is being delayed
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_HasDelayedBufAlloc(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0;
+}
+
+/*NUMPY_API
+ * Whether the iterator handles the inner loop
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_HasExternalLoop(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0;
+}
+
+/*NUMPY_API
+ * Whether the iterator is tracking a multi-index
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_HasMultiIndex(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0;
+}
+
+/*NUMPY_API
+ * Whether the iterator is tracking an index
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_HasIndex(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0;
+}
+
+/*NUMPY_API
+ * Whether the iteration could be done with no buffering.
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_RequiresBuffering(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int iop, nop = NIT_NOP(iter);
+
+ char *op_itflags;
+
+ if (!(itflags&NPY_ITFLAG_BUFFER)) {
+ return 0;
+ }
+
+ op_itflags = NIT_OPITFLAGS(iter);
+
+ /* If any operand requires a cast, buffering is mandatory */
+ for (iop = 0; iop < nop; ++iop) {
+ if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*NUMPY_API
+ * Whether the iteration loop, and in particular the iternext()
+ * function, needs API access. If this is true, the GIL must
+ * be retained while iterating.
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_IterationNeedsAPI(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
+}
+
+/*NUMPY_API
+ * Gets the number of dimensions being iterated
+ */
+NPY_NO_EXPORT int
+NpyIter_GetNDim(NpyIter *iter)
+{
+ return NIT_NDIM(iter);
+}
+
+/*NUMPY_API
+ * Gets the number of operands being iterated
+ */
+NPY_NO_EXPORT int
+NpyIter_GetNOp(NpyIter *iter)
+{
+ return NIT_NOP(iter);
+}
+
+/*NUMPY_API
+ * Gets the number of elements being iterated
+ */
+NPY_NO_EXPORT npy_intp
+NpyIter_GetIterSize(NpyIter *iter)
+{
+ return NIT_ITERSIZE(iter);
+}
+
+/*NUMPY_API
+ * Whether the iterator is buffered
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_IsBuffered(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
+}
+
+/*NUMPY_API
+ * Whether the inner loop can grow if buffering is unneeded
+ */
+NPY_NO_EXPORT npy_bool
+NpyIter_IsGrowInner(NpyIter *iter)
+{
+ return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
+}
+
+/*NUMPY_API
+ * Gets the size of the buffer, or 0 if buffering is not enabled
+ */
+NPY_NO_EXPORT npy_intp
+NpyIter_GetBufferSize(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ return NBF_BUFFERSIZE(bufferdata);
+ }
+ else {
+ return 0;
+ }
+
+}
+
+/*NUMPY_API
+ * Gets the range of iteration indices being iterated
+ */
+NPY_NO_EXPORT void
+NpyIter_GetIterIndexRange(NpyIter *iter,
+ npy_intp *istart, npy_intp *iend)
+{
+ *istart = NIT_ITERSTART(iter);
+ *iend = NIT_ITEREND(iter);
+}
+
+/*NUMPY_API
+ * Gets the broadcast shape if a multi-index is being tracked by the iterator,
+ * otherwise gets the shape of the iteration as Fortran-order
+ * (fastest-changing index first).
+ *
+ * The reason Fortran-order is returned when a multi-index
+ * is not enabled is that this is providing a direct view into how
+ * the iterator traverses the n-dimensional space. The iterator organizes
+ * its memory from fastest index to slowest index, and when
+ * a multi-index is enabled, it uses a permutation to recover the original
+ * order.
+ *
+ * Returns NPY_SUCCEED or NPY_FAIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ int idim, sizeof_axisdata;
+ NpyIter_AxisData *axisdata;
+ npy_int8 *perm;
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
+ perm = NIT_PERM(iter);
+ for(idim = 0; idim < ndim; ++idim) {
+ npy_int8 p = perm[idim];
+ if (p < 0) {
+ outshape[ndim+p] = NAD_SHAPE(axisdata);
+ }
+ else {
+ outshape[ndim-p-1] = NAD_SHAPE(axisdata);
+ }
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+ }
+ else {
+ for(idim = 0; idim < ndim; ++idim) {
+ outshape[idim] = NAD_SHAPE(axisdata);
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+ }
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Builds a set of strides which are the same as the strides of an
+ * output array created using the NPY_ITER_ALLOCATE flag, where NULL
+ * was passed for op_axes. This is for data packed contiguously,
+ * but not necessarily in C or Fortran order. This should be used
+ * together with NpyIter_GetShape and NpyIter_GetNDim.
+ *
+ * A use case for this function is to match the shape and layout of
+ * the iterator and tack on one or more dimensions. For example,
+ * in order to generate a vector per input value for a numerical gradient,
+ * you pass in ndim*itemsize for itemsize, then add another dimension to
+ * the end with size ndim and stride itemsize. To do the Hessian matrix,
+ * you do the same thing but add two dimensions, or take advantage of
+ * the symmetry and pack it into 1 dimension with a particular encoding.
+ *
+ * This function may only be called if the iterator is tracking a multi-index
+ * and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
+ * being iterated in reverse order.
+ *
+ * If an array is created with this method, simply adding 'itemsize'
+ * for each iteration will traverse the new array matching the
+ * iterator.
+ *
+ * Returns NPY_SUCCEED or NPY_FAIL.
+ */
+NPY_NO_EXPORT int
+NpyIter_CreateCompatibleStrides(NpyIter *iter,
+ npy_intp itemsize, npy_intp *outstrides)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ npy_intp sizeof_axisdata;
+ NpyIter_AxisData *axisdata;
+ npy_int8 *perm;
+
+ if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Iterator CreateCompatibleStrides may only be called "
+ "if a multi-index is being tracked");
+ return NPY_FAIL;
+ }
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ perm = NIT_PERM(iter);
+ for(idim = 0; idim < ndim; ++idim) {
+ npy_int8 p = perm[idim];
+ if (p < 0) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Iterator CreateCompatibleStrides may only be called "
+ "if DONT_NEGATE_STRIDES was used to prevent reverse "
+ "iteration of an axis");
+ return NPY_FAIL;
+ }
+ else {
+ outstrides[ndim-p-1] = itemsize;
+ }
+
+ itemsize *= NAD_SHAPE(axisdata);
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+
+ return NPY_SUCCEED;
+}
+
+/*NUMPY_API
+ * Get the array of data pointers (1 per object being iterated)
+ *
+ * This function may be safely called without holding the Python GIL.
+ */
+NPY_NO_EXPORT char **
+NpyIter_GetDataPtrArray(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ return NBF_PTRS(bufferdata);
+ }
+ else {
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ return NAD_PTRS(axisdata);
+ }
+}
+
+/*NUMPY_API
+ * Get the array of data pointers (1 per object being iterated),
+ * directly into the arrays (never pointing to a buffer), for starting
+ * unbuffered iteration. This always returns the addresses for the
+ * iterator position as reset to iterator index 0.
+ *
+ * These pointers are different from the pointers accepted by
+ * NpyIter_ResetBasePointers, because the direction along some
+ * axes may have been reversed, requiring base offsets.
+ *
+ * This function may be safely called without holding the Python GIL.
+ */
+NPY_NO_EXPORT char **
+NpyIter_GetInitialDataPtrArray(NpyIter *iter)
+{
+ /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ return NIT_RESETDATAPTR(iter);
+}
+
+/*NUMPY_API
+ * Get the array of data type pointers (1 per object being iterated)
+ */
+NPY_NO_EXPORT PyArray_Descr **
+NpyIter_GetDescrArray(NpyIter *iter)
+{
+ /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
+ /*int ndim = NIT_NDIM(iter);*/
+ /*int nop = NIT_NOP(iter);*/
+
+ return NIT_DTYPES(iter);
+}
+
+/*NUMPY_API
+ * Get the array of objects being iterated
+ */
+NPY_NO_EXPORT PyArrayObject **
+NpyIter_GetOperandArray(NpyIter *iter)
+{
+ /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ return NIT_OPERANDS(iter);
+}
+
+/*NUMPY_API
+ * Returns a view to the i-th object with the iterator's internal axes
+ */
+NPY_NO_EXPORT PyArrayObject *
+NpyIter_GetIterView(NpyIter *iter, npy_intp i)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ PyArrayObject *obj, *view;
+ PyArray_Descr *dtype;
+ char *dataptr;
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+ int writeable;
+
+ if (i < 0 || i >= nop) {
+ PyErr_SetString(PyExc_IndexError,
+ "index provided for an iterator view was out of bounds");
+ return NULL;
+ }
+
+ /* Don't provide views if buffering is enabled */
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot provide an iterator view when buffering is enabled");
+ return NULL;
+ }
+
+ obj = NIT_OPERANDS(iter)[i];
+ dtype = PyArray_DESCR(obj);
+ writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
+ dataptr = NIT_RESETDATAPTR(iter)[i];
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ /* Retrieve the shape and strides from the axisdata */
+ for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+ shape[ndim-idim-1] = NAD_SHAPE(axisdata);
+ strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];
+ }
+
+ Py_INCREF(dtype);
+ view = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, ndim,
+ shape, strides, dataptr,
+ writeable ? NPY_ARRAY_WRITEABLE : 0,
+ NULL);
+ if (view == NULL) {
+ return NULL;
+ }
+ /* Tell the view who owns the data */
+ Py_INCREF(obj);
+ view->base = (PyObject *)obj;
+ /* Make sure all the flags are good */
+ PyArray_UpdateFlags(view, NPY_ARRAY_UPDATE_ALL);
+
+ return view;
+}
+
+/*NUMPY_API
+ * Get a pointer to the index, if it is being tracked
+ */
+NPY_NO_EXPORT npy_intp *
+NpyIter_GetIndexPtr(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+
+ if (itflags&NPY_ITFLAG_HASINDEX) {
+ /* The index is just after the data pointers */
+ return (npy_intp*)NAD_PTRS(axisdata) + nop;
+ }
+ else {
+ return NULL;
+ }
+}
+
+/*NUMPY_API
+ * Gets an array of read flags (1 per object being iterated)
+ */
+NPY_NO_EXPORT void
+NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
+{
+ /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
+ /*int ndim = NIT_NDIM(iter);*/
+ int iop, nop = NIT_NOP(iter);
+
+ char *op_itflags = NIT_OPITFLAGS(iter);
+
+ for (iop = 0; iop < nop; ++iop) {
+ outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
+ }
+}
+
+/*NUMPY_API
+ * Gets an array of write flags (1 per object being iterated)
+ */
+NPY_NO_EXPORT void
+NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
+{
+ /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
+ /*int ndim = NIT_NDIM(iter);*/
+ int iop, nop = NIT_NOP(iter);
+
+ char *op_itflags = NIT_OPITFLAGS(iter);
+
+ for (iop = 0; iop < nop; ++iop) {
+ outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
+ }
+}
+
+
+/*NUMPY_API
+ * Get the array of strides for the inner loop (when HasExternalLoop is true)
+ *
+ * This function may be safely called without holding the Python GIL.
+ */
+NPY_NO_EXPORT npy_intp *
+NpyIter_GetInnerStrideArray(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
+ return NBF_STRIDES(data);
+ }
+ else {
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ return NAD_STRIDES(axisdata);
+ }
+}
+
+/*NUMPY_API
+ * Gets the array of strides for the specified axis.
+ * If the iterator is tracking a multi-index, gets the strides
+ * for the axis specified, otherwise gets the strides for
+ * the iteration axis as Fortran order (fastest-changing axis first).
+ *
+ * Returns NULL if an error occurs.
+ */
+NPY_NO_EXPORT npy_intp *
+NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ npy_int8 *perm = NIT_PERM(iter);
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ if (axis < 0 || axis >= ndim) {
+ PyErr_SetString(PyExc_ValueError,
+ "axis out of bounds in iterator GetStrideAxisArray");
+ return NULL;
+ }
+
+ if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
+ /* Reverse axis, since the iterator treats them that way */
+ axis = ndim-1-axis;
+
+ /* First find the axis in question */
+ for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+ if (perm[idim] == axis || -1 - perm[idim] == axis) {
+ return NAD_STRIDES(axisdata);
+ }
+ }
+ }
+ else {
+ return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
+ }
+
+ PyErr_SetString(PyExc_RuntimeError,
+ "internal error in iterator perm");
+ return NULL;
+}
+
+/*NUMPY_API
+ * Get an array of strides which are fixed. Any strides which may
+ * change during iteration receive the value NPY_MAX_INTP. Once
+ * the iterator is ready to iterate, call this to get the strides
+ * which will always be fixed in the inner loop, then choose optimized
+ * inner loop functions which take advantage of those fixed strides.
+ *
+ * This function may be safely called without holding the Python GIL.
+ */
+NPY_NO_EXPORT void
+NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int ndim = NIT_NDIM(iter);
+ int iop, nop = NIT_NOP(iter);
+
+ NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
+ npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
+ char *op_itflags = NIT_OPITFLAGS(iter);
+ npy_intp stride, *strides = NBF_STRIDES(data),
+ *ad_strides = NAD_STRIDES(axisdata0);
+ PyArray_Descr **dtypes = NIT_DTYPES(iter);
+
+ for (iop = 0; iop < nop; ++iop) {
+ stride = strides[iop];
+ /*
+ * Operands which are always/never buffered have fixed strides,
+ * and everything has fixed strides when ndim is 0 or 1
+ */
+ if (ndim <= 1 || (op_itflags[iop]&
+ (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
+ out_strides[iop] = stride;
+ }
+ /* If it's a reduction, 0-stride inner loop may have fixed stride */
+ else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
+ /* If it's a reduction operand, definitely fixed stride */
+ if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
+ out_strides[iop] = stride;
+ }
+ /*
+ * Otherwise it's a fixed stride if the stride is 0
+ * for all inner dimensions of the reduction double loop
+ */
+ else {
+ NpyIter_AxisData *axisdata = axisdata0;
+ int idim,
+ reduce_outerdim = NBF_REDUCE_OUTERDIM(data);
+ for (idim = 0; idim < reduce_outerdim; ++idim) {
+ if (NAD_STRIDES(axisdata)[iop] != 0) {
+ break;
+ }
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+ /* If all the strides were 0, the stride won't change */
+ if (idim == reduce_outerdim) {
+ out_strides[iop] = stride;
+ }
+ else {
+ out_strides[iop] = NPY_MAX_INTP;
+ }
+ }
+ }
+ /*
+ * Inner loop contiguous array means its stride won't change when
+ * switching between buffering and not buffering
+ */
+ else if (ad_strides[iop] == dtypes[iop]->elsize) {
+ out_strides[iop] = ad_strides[iop];
+ }
+ /*
+ * Otherwise the strides can change if the operand is sometimes
+ * buffered, sometimes not.
+ */
+ else {
+ out_strides[iop] = NPY_MAX_INTP;
+ }
+ }
+ }
+ else {
+ /* If there's no buffering, the strides are always fixed */
+ memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
+ }
+}
+
+/*NUMPY_API
+ * Get a pointer to the size of the inner loop (when HasExternalLoop is true)
+ *
+ * This function may be safely called without holding the Python GIL.
+ */
+NPY_NO_EXPORT npy_intp *
+NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ /*int ndim = NIT_NDIM(iter);*/
+ int nop = NIT_NOP(iter);
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
+ return &NBF_SIZE(data);
+ }
+ else {
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ return &NAD_SHAPE(axisdata);
+ }
+}
+
+/*NUMPY_API
+ * For debugging
+ */
+NPY_NO_EXPORT void
+NpyIter_DebugPrint(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int iop, nop = NIT_NOP(iter);
+
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+
+ PyGILState_STATE gilstate = PyGILState_Ensure();
+
+ printf("\n------ BEGIN ITERATOR DUMP ------\n");
+ printf("| Iterator Address: %p\n", (void *)iter);
+ printf("| ItFlags: ");
+ if (itflags&NPY_ITFLAG_IDENTPERM)
+ printf("IDENTPERM ");
+ if (itflags&NPY_ITFLAG_NEGPERM)
+ printf("NEGPERM ");
+ if (itflags&NPY_ITFLAG_HASINDEX)
+ printf("HASINDEX ");
+ if (itflags&NPY_ITFLAG_HASMULTIINDEX)
+ printf("HASMULTIINDEX ");
+ if (itflags&NPY_ITFLAG_FORCEDORDER)
+ printf("FORCEDORDER ");
+ if (itflags&NPY_ITFLAG_EXLOOP)
+ printf("EXLOOP ");
+ if (itflags&NPY_ITFLAG_RANGE)
+ printf("RANGE ");
+ if (itflags&NPY_ITFLAG_BUFFER)
+ printf("BUFFER ");
+ if (itflags&NPY_ITFLAG_GROWINNER)
+ printf("GROWINNER ");
+ if (itflags&NPY_ITFLAG_ONEITERATION)
+ printf("ONEITERATION ");
+ if (itflags&NPY_ITFLAG_DELAYBUF)
+ printf("DELAYBUF ");
+ if (itflags&NPY_ITFLAG_NEEDSAPI)
+ printf("NEEDSAPI ");
+ if (itflags&NPY_ITFLAG_REDUCE)
+ printf("REDUCE ");
+ if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS)
+ printf("REUSE_REDUCE_LOOPS ");
+ printf("\n");
+ printf("| NDim: %d\n", (int)ndim);
+ printf("| NOp: %d\n", (int)nop);
+ printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));
+ printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));
+ printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));
+ printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));
+ printf("| Iterator SizeOf: %d\n",
+ (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
+ printf("| BufferData SizeOf: %d\n",
+ (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));
+ printf("| AxisData SizeOf: %d\n",
+ (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));
+ printf("|\n");
+
+ printf("| Perm: ");
+ for (idim = 0; idim < ndim; ++idim) {
+ printf("%d ", (int)NIT_PERM(iter)[idim]);
+ }
+ printf("\n");
+ printf("| DTypes: ");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
+ }
+ printf("\n");
+ printf("| DTypes: ");
+ for (iop = 0; iop < nop; ++iop) {
+ if (NIT_DTYPES(iter)[iop] != NULL)
+ PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
+ else
+ printf("(nil) ");
+ printf(" ");
+ }
+ printf("\n");
+ printf("| InitDataPtrs: ");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
+ }
+ printf("\n");
+ printf("| BaseOffsets: ");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
+ }
+ printf("\n");
+ if (itflags&NPY_ITFLAG_HASINDEX) {
+ printf("| InitIndex: %d\n",
+ (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
+ }
+ printf("| Operands: ");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
+ }
+ printf("\n");
+ printf("| Operand DTypes: ");
+ for (iop = 0; iop < nop; ++iop) {
+ PyArray_Descr *dtype;
+ if (NIT_OPERANDS(iter)[iop] != NULL) {
+ dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
+ if (dtype != NULL)
+ PyObject_Print((PyObject *)dtype, stdout, 0);
+ else
+ printf("(nil) ");
+ }
+ else {
+ printf("(op nil) ");
+ }
+ printf(" ");
+ }
+ printf("\n");
+ printf("| OpItFlags:\n");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("| Flags[%d]: ", (int)iop);
+ if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
+ printf("READ ");
+ if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
+ printf("WRITE ");
+ if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
+ printf("CAST ");
+ if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
+ printf("BUFNEVER ");
+ if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
+ printf("ALIGNED ");
+ if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
+ printf("REDUCE ");
+ printf("\n");
+ }
+ printf("|\n");
+
+ if (itflags&NPY_ITFLAG_BUFFER) {
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ printf("| BufferData:\n");
+ printf("| BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
+ printf("| Size: %d\n", (int)NBF_SIZE(bufferdata));
+ printf("| BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));
+ if (itflags&NPY_ITFLAG_REDUCE) {
+ printf("| REDUCE Pos: %d\n",
+ (int)NBF_REDUCE_POS(bufferdata));
+ printf("| REDUCE OuterSize: %d\n",
+ (int)NBF_REDUCE_OUTERSIZE(bufferdata));
+ printf("| REDUCE OuterDim: %d\n",
+ (int)NBF_REDUCE_OUTERDIM(bufferdata));
+ }
+ printf("| Strides: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
+ printf("\n");
+ /* Print the fixed strides when there's no inner loop */
+ if (itflags&NPY_ITFLAG_EXLOOP) {
+ npy_intp fixedstrides[NPY_MAXDIMS];
+ printf("| Fixed Strides: ");
+ NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
+ for (iop = 0; iop < nop; ++iop)
+ printf("%d ", (int)fixedstrides[iop]);
+ printf("\n");
+ }
+ printf("| Ptrs: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
+ printf("\n");
+ if (itflags&NPY_ITFLAG_REDUCE) {
+ printf("| REDUCE Outer Strides: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
+ printf("\n");
+ printf("| REDUCE Outer Ptrs: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
+ printf("\n");
+ }
+ printf("| ReadTransferFn: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_READTRANSFERFN(bufferdata)[iop]);
+ printf("\n");
+ printf("| ReadTransferData: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_READTRANSFERDATA(bufferdata)[iop]);
+ printf("\n");
+ printf("| WriteTransferFn: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_WRITETRANSFERFN(bufferdata)[iop]);
+ printf("\n");
+ printf("| WriteTransferData: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_WRITETRANSFERDATA(bufferdata)[iop]);
+ printf("\n");
+ printf("| Buffers: ");
+ for (iop = 0; iop < nop; ++iop)
+ printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
+ printf("\n");
+ printf("|\n");
+ }
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+ printf("| AxisData[%d]:\n", (int)idim);
+ printf("| Shape: %d\n", (int)NAD_SHAPE(axisdata));
+ printf("| Index: %d\n", (int)NAD_INDEX(axisdata));
+ printf("| Strides: ");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
+ }
+ printf("\n");
+ if (itflags&NPY_ITFLAG_HASINDEX) {
+ printf("| Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
+ }
+ printf("| Ptrs: ");
+ for (iop = 0; iop < nop; ++iop) {
+ printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
+ }
+ printf("\n");
+ if (itflags&NPY_ITFLAG_HASINDEX) {
+ printf("| Index Value: %d\n",
+ (int)((npy_intp*)NAD_PTRS(axisdata))[nop]);
+ }
+ }
+
+ printf("------- END ITERATOR DUMP -------\n");
+
+ PyGILState_Release(gilstate);
+}
+
+NPY_NO_EXPORT void
+npyiter_coalesce_axes(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ npy_intp istrides, nstrides = NAD_NSTRIDES();
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
+ npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ NpyIter_AxisData *ad_compress;
+ npy_intp new_ndim = 1;
+
+ /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
+ NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
+
+ axisdata = NIT_AXISDATA(iter);
+ ad_compress = axisdata;
+
+ for (idim = 0; idim < ndim-1; ++idim) {
+ int can_coalesce = 1;
+ npy_intp shape0 = NAD_SHAPE(ad_compress);
+ npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
+ npy_intp *strides0 = NAD_STRIDES(ad_compress);
+ npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));
+
+ /* Check that all the axes can be coalesced */
+ for (istrides = 0; istrides < nstrides; ++istrides) {
+ if (!((shape0 == 1 && strides0[istrides] == 0) ||
+ (shape1 == 1 && strides1[istrides] == 0)) &&
+ (strides0[istrides]*shape0 != strides1[istrides])) {
+ can_coalesce = 0;
+ break;
+ }
+ }
+
+ if (can_coalesce) {
+ npy_intp *strides = NAD_STRIDES(ad_compress);
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
+ for (istrides = 0; istrides < nstrides; ++istrides) {
+ if (strides[istrides] == 0) {
+ strides[istrides] = NAD_STRIDES(axisdata)[istrides];
+ }
+ }
+ }
+ else {
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ NIT_ADVANCE_AXISDATA(ad_compress, 1);
+ if (ad_compress != axisdata) {
+ memcpy(ad_compress, axisdata, sizeof_axisdata);
+ }
+ ++new_ndim;
+ }
+ }
+
+ /*
+ * If the number of axes shrunk, reset the perm and
+ * compress the data into the new layout.
+ */
+ if (new_ndim < ndim) {
+ npy_int8 *perm = NIT_PERM(iter);
+
+ /* Reset to an identity perm */
+ for (idim = 0; idim < new_ndim; ++idim) {
+ perm[idim] = (npy_int8)idim;
+ }
+ NIT_NDIM(iter) = new_ndim;
+ }
+}
+
+/*
+ *
+ * If errmsg is non-NULL, it should point to a variable which will
+ * receive the error message, and no Python exception will be set.
+ * This is so that the function can be called from code not holding
+ * the GIL.
+ */
+NPY_NO_EXPORT int
+npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
+{
+ /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
+ /*int ndim = NIT_NDIM(iter);*/
+ int iop = 0, nop = NIT_NOP(iter);
+
+ npy_intp i;
+ char *op_itflags = NIT_OPITFLAGS(iter);
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ PyArray_Descr **op_dtype = NIT_DTYPES(iter);
+ npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
+ char *buffer, **buffers = NBF_BUFFERS(bufferdata);
+
+ for (iop = 0; iop < nop; ++iop) {
+ char flags = op_itflags[iop];
+
+ /*
+ * If we have determined that a buffer may be needed,
+ * allocate one.
+ */
+ if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) {
+ npy_intp itemsize = op_dtype[iop]->elsize;
+ buffer = PyArray_malloc(itemsize*buffersize);
+ if (buffer == NULL) {
+ if (errmsg == NULL) {
+ PyErr_NoMemory();
+ }
+ else {
+ *errmsg = "out of memory";
+ }
+ goto fail;
+ }
+ buffers[iop] = buffer;
+ }
+ }
+
+ return 1;
+
+fail:
+ for (i = 0; i < iop; ++i) {
+ if (buffers[i] != NULL) {
+ PyArray_free(buffers[i]);
+ buffers[i] = NULL;
+ }
+ }
+ return 0;
+}
+
+/*
+ * This sets the AXISDATA portion of the iterator to the specified
+ * iterindex, updating the pointers as well. This function does
+ * no error checking.
+ */
+NPY_NO_EXPORT void
+npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int nop = NIT_NOP(iter);
+
+ char **dataptr;
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+ npy_intp istrides, nstrides, i, shape;
+
+ axisdata = NIT_AXISDATA(iter);
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ nstrides = NAD_NSTRIDES();
+
+ NIT_ITERINDEX(iter) = iterindex;
+
+ if (iterindex == 0) {
+ dataptr = NIT_RESETDATAPTR(iter);
+
+ for (idim = 0; idim < ndim; ++idim) {
+ char **ptrs;
+ NAD_INDEX(axisdata) = 0;
+ ptrs = NAD_PTRS(axisdata);
+ for (istrides = 0; istrides < nstrides; ++istrides) {
+ ptrs[istrides] = dataptr[istrides];
+ }
+
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ }
+ }
+ else {
+ /*
+ * Set the multi-index, from the fastest-changing to the
+ * slowest-changing.
+ */
+ axisdata = NIT_AXISDATA(iter);
+ shape = NAD_SHAPE(axisdata);
+ i = iterindex;
+ iterindex /= shape;
+ NAD_INDEX(axisdata) = i - iterindex * shape;
+ for (idim = 0; idim < ndim-1; ++idim) {
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+
+ shape = NAD_SHAPE(axisdata);
+ i = iterindex;
+ iterindex /= shape;
+ NAD_INDEX(axisdata) = i - iterindex * shape;
+ }
+
+ dataptr = NIT_RESETDATAPTR(iter);
+
+ /*
+ * Accumulate the successive pointers with their
+ * offsets in the opposite order, starting from the
+ * original data pointers.
+ */
+ for (idim = 0; idim < ndim; ++idim) {
+ npy_intp *strides;
+ char **ptrs;
+
+ strides = NAD_STRIDES(axisdata);
+ ptrs = NAD_PTRS(axisdata);
+
+ i = NAD_INDEX(axisdata);
+
+ for (istrides = 0; istrides < nstrides; ++istrides) {
+ ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
+ }
+
+ dataptr = ptrs;
+
+ NIT_ADVANCE_AXISDATA(axisdata, -1);
+ }
+ }
+}
+
+/*
+ * This gets called after the the buffers have been exhausted, and
+ * their data needs to be written back to the arrays. The multi-index
+ * must be positioned for the beginning of the buffer.
+ */
+NPY_NO_EXPORT void
+npyiter_copy_from_buffers(NpyIter *iter)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int ndim = NIT_NDIM(iter);
+ int iop, nop = NIT_NOP(iter);
+
+ char *op_itflags = NIT_OPITFLAGS(iter);
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
+ *reduce_outeraxisdata = NULL;
+
+ PyArray_Descr **dtypes = NIT_DTYPES(iter);
+ npy_intp transfersize = NBF_SIZE(bufferdata),
+ buffersize = NBF_BUFFERSIZE(bufferdata);
+ npy_intp *strides = NBF_STRIDES(bufferdata),
+ *ad_strides = NAD_STRIDES(axisdata);
+ npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
+ char **buffers = NBF_BUFFERS(bufferdata);
+ char *buffer;
+
+ npy_intp reduce_outerdim = 0;
+ npy_intp *reduce_outerstrides = NULL;
+
+ PyArray_StridedTransferFn *stransfer = NULL;
+ NpyAuxData *transferdata = NULL;
+
+ npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
+ NPY_SIZEOF_INTP;
+
+ /* If we're past the end, nothing to copy */
+ if (NBF_SIZE(bufferdata) == 0) {
+ return;
+ }
+
+ NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");
+
+ if (itflags&NPY_ITFLAG_REDUCE) {
+ reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
+ reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
+ reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
+ transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
+ }
+
+ for (iop = 0; iop < nop; ++iop) {
+ stransfer = NBF_WRITETRANSFERFN(bufferdata)[iop];
+ transferdata = NBF_WRITETRANSFERDATA(bufferdata)[iop];
+ buffer = buffers[iop];
+ /*
+ * Copy the data back to the arrays. If the type has refs,
+ * this function moves them so the buffer's refs are released.
+ */
+ if ((stransfer != NULL) && (op_itflags[iop]&NPY_OP_ITFLAG_WRITE)) {
+ /* Copy back only if the pointer was pointing to the buffer */
+ npy_intp delta = (ptrs[iop] - buffer);
+ if (0 <= delta && delta <= buffersize*dtypes[iop]->elsize) {
+ npy_intp op_transfersize;
+
+ npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape;
+ int ndim_transfer;
+
+ NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n",
+ (int)iop);
+
+ /*
+ * If this operand is being reduced in the inner loop,
+ * its buffering stride was set to zero, and just
+ * one element was copied.
+ */
+ if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
+ if (strides[iop] == 0) {
+ if (reduce_outerstrides[iop] == 0) {
+ op_transfersize = 1;
+ src_stride = 0;
+ dst_strides = &src_stride;
+ dst_coords = &NAD_INDEX(reduce_outeraxisdata);
+ dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
+ ndim_transfer = 1;
+ }
+ else {
+ op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
+ src_stride = reduce_outerstrides[iop];
+ dst_strides =
+ &NAD_STRIDES(reduce_outeraxisdata)[iop];
+ dst_coords = &NAD_INDEX(reduce_outeraxisdata);
+ dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
+ ndim_transfer = ndim - reduce_outerdim;
+ }
+ }
+ else {
+ if (reduce_outerstrides[iop] == 0) {
+ op_transfersize = NBF_SIZE(bufferdata);
+ src_stride = strides[iop];
+ dst_strides = &ad_strides[iop];
+ dst_coords = &NAD_INDEX(axisdata);
+ dst_shape = &NAD_SHAPE(axisdata);
+ ndim_transfer = reduce_outerdim ?
+ reduce_outerdim : 1;
+ }
+ else {
+ op_transfersize = transfersize;
+ src_stride = strides[iop];
+ dst_strides = &ad_strides[iop];
+ dst_coords = &NAD_INDEX(axisdata);
+ dst_shape = &NAD_SHAPE(axisdata);
+ ndim_transfer = ndim;
+ }
+ }
+ }
+ else {
+ op_transfersize = transfersize;
+ src_stride = strides[iop];
+ dst_strides = &ad_strides[iop];
+ dst_coords = &NAD_INDEX(axisdata);
+ dst_shape = &NAD_SHAPE(axisdata);
+ ndim_transfer = ndim;
+ }
+
+ NPY_IT_DBG_PRINT2("Iterator: Copying buffer to "
+ "operand %d (%d items)\n",
+ (int)iop, (int)op_transfersize);
+
+ PyArray_TransferStridedToNDim(ndim_transfer,
+ ad_ptrs[iop], dst_strides, axisdata_incr,
+ buffer, src_stride,
+ dst_coords, axisdata_incr,
+ dst_shape, axisdata_incr,
+ op_transfersize, dtypes[iop]->elsize,
+ stransfer,
+ transferdata);
+ }
+ }
+ /* If there's no copy back, we may have to decrement refs. In
+ * this case, the transfer function has a 'decsrcref' transfer
+ * function, so we can use it to do the decrement.
+ */
+ else if (stransfer != NULL) {
+ /* Decrement refs only if the pointer was pointing to the buffer */
+ npy_intp delta = (ptrs[iop] - buffer);
+ if (0 <= delta && delta <= transfersize*dtypes[iop]->elsize) {
+ NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer "
+ "of operand %d\n", (int)iop);
+ /* Decrement refs */
+ stransfer(NULL, 0, buffer, dtypes[iop]->elsize,
+ transfersize, dtypes[iop]->elsize,
+ transferdata);
+ /*
+ * Zero out the memory for safety. For instance,
+ * if during iteration some Python code copied an
+ * array pointing into the buffer, it will get None
+ * values for its references after this.
+ */
+ memset(buffer, 0, dtypes[iop]->elsize*transfersize);
+ }
+ }
+ }
+
+ NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");
+}
+
+/*
+ * This gets called after the iterator has been positioned to a multi-index
+ * for the start of a buffer. It decides which operands need a buffer,
+ * and copies the data into the buffers.
+ */
+NPY_NO_EXPORT void
+npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int ndim = NIT_NDIM(iter);
+ int iop, nop = NIT_NOP(iter);
+
+ char *op_itflags = NIT_OPITFLAGS(iter);
+ NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+ NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
+ *reduce_outeraxisdata = NULL;
+
+ PyArray_Descr **dtypes = NIT_DTYPES(iter);
+ PyArrayObject **operands = NIT_OPERANDS(iter);
+ npy_intp *strides = NBF_STRIDES(bufferdata),
+ *ad_strides = NAD_STRIDES(axisdata);
+ npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
+ char **buffers = NBF_BUFFERS(bufferdata);
+ npy_intp iterindex, iterend, transfersize,
+ singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
+ int is_onestride = 0, any_buffered = 0;
+
+ npy_intp *reduce_outerstrides = NULL;
+ char **reduce_outerptrs = NULL;
+
+ PyArray_StridedTransferFn *stransfer = NULL;
+ NpyAuxData *transferdata = NULL;
+
+ /*
+ * Have to get this flag before npyiter_checkreducesize sets
+ * it for the next iteration.
+ */
+ npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
+ ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);
+
+ npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
+ NPY_SIZEOF_INTP;
+
+ NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");
+
+ /* Calculate the size if using any buffers */
+ iterindex = NIT_ITERINDEX(iter);
+ iterend = NIT_ITEREND(iter);
+ transfersize = NBF_BUFFERSIZE(bufferdata);
+ if (transfersize > iterend - iterindex) {
+ transfersize = iterend - iterindex;
+ }
+
+ /* If last time around, the reduce loop structure was full, we reuse it */
+ if (reuse_reduce_loops) {
+ npy_intp full_transfersize;
+
+ reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
+ reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
+ reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
+ reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
+ reduce_innersize = NBF_SIZE(bufferdata);
+ NBF_REDUCE_POS(bufferdata) = 0;
+ /*
+ * Try to do make the outersize as big as possible. This allows
+ * it to shrink when processing the last bit of the outer reduce loop,
+ * then grow again at the beginnning of the next outer reduce loop.
+ */
+ NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)-
+ NAD_INDEX(reduce_outeraxisdata));
+ full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
+ /* If the full transfer size doesn't fit in the buffer, truncate it */
+ if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
+ NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
+ transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
+ }
+ else {
+ transfersize = full_transfersize;
+ }
+ NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
+
+ NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
+ "itersize: %d\n",
+ (int)transfersize,
+ (int)reduce_innersize,
+ (int)NpyIter_GetIterSize(iter));
+ NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
+ (int)NBF_REDUCE_OUTERSIZE(bufferdata));
+ }
+ /*
+ * If there are any reduction operands, we may have to make
+ * the size smaller so we don't copy the same value into
+ * a buffer twice, as the buffering does not have a mechanism
+ * to combine values itself.
+ */
+ else if (itflags&NPY_ITFLAG_REDUCE) {
+ NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
+ transfersize = npyiter_checkreducesize(iter, transfersize,
+ &reduce_innersize,
+ &reduce_outerdim);
+ NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
+ "itersize: %d\n",
+ (int)transfersize,
+ (int)reduce_innersize,
+ (int)NpyIter_GetIterSize(iter));
+
+ reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
+ reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
+ reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
+ NBF_SIZE(bufferdata) = reduce_innersize;
+ NBF_REDUCE_POS(bufferdata) = 0;
+ NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
+ NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
+ if (reduce_innersize == 0) {
+ NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
+ return;
+ }
+ else {
+ NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
+ }
+ }
+ else {
+ NBF_SIZE(bufferdata) = transfersize;
+ NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
+ }
+
+ /* Calculate the maximum size if using a single stride and no buffers */
+ singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
+ if (singlestridesize > iterend - iterindex) {
+ singlestridesize = iterend - iterindex;
+ }
+ if (singlestridesize >= transfersize) {
+ is_onestride = 1;
+ }
+
+ for (iop = 0; iop < nop; ++iop) {
+ /*
+ * If the buffer is write-only, these two are NULL, and the buffer
+ * pointers will be set up but the read copy won't be done
+ */
+ stransfer = NBF_READTRANSFERFN(bufferdata)[iop];
+ transferdata = NBF_READTRANSFERDATA(bufferdata)[iop];
+ switch (op_itflags[iop]&
+ (NPY_OP_ITFLAG_BUFNEVER|
+ NPY_OP_ITFLAG_CAST|
+ NPY_OP_ITFLAG_REDUCE)) {
+ /* Never need to buffer this operand */
+ case NPY_OP_ITFLAG_BUFNEVER:
+ ptrs[iop] = ad_ptrs[iop];
+ if (itflags&NPY_ITFLAG_REDUCE) {
+ reduce_outerstrides[iop] = reduce_innersize *
+ strides[iop];
+ reduce_outerptrs[iop] = ptrs[iop];
+ }
+ /*
+ * Should not adjust the stride - ad_strides[iop]
+ * could be zero, but strides[iop] was initialized
+ * to the first non-trivial stride.
+ */
+ stransfer = NULL;
+ break;
+ /* Never need to buffer this operand */
+ case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE:
+ ptrs[iop] = ad_ptrs[iop];
+ reduce_outerptrs[iop] = ptrs[iop];
+ reduce_outerstrides[iop] = 0;
+ /*
+ * Should not adjust the stride - ad_strides[iop]
+ * could be zero, but strides[iop] was initialized
+ * to the first non-trivial stride.
+ */
+ stransfer = NULL;
+ break;
+ /* Just a copy */
+ case 0:
+ /*
+ * No copyswap or cast was requested, so all we're
+ * doing is copying the data to fill the buffer and
+ * produce a single stride. If the underlying data
+ * already does that, no need to copy it.
+ */
+ if (is_onestride) {
+ ptrs[iop] = ad_ptrs[iop];
+ strides[iop] = ad_strides[iop];
+ stransfer = NULL;
+ }
+ /* If some other op is reduced, we have a double reduce loop */
+ else if ((itflags&NPY_ITFLAG_REDUCE) &&
+ (reduce_outerdim == 1) &&
+ (transfersize/reduce_innersize <=
+ NAD_SHAPE(reduce_outeraxisdata) -
+ NAD_INDEX(reduce_outeraxisdata))) {
+ ptrs[iop] = ad_ptrs[iop];
+ reduce_outerptrs[iop] = ptrs[iop];
+ strides[iop] = ad_strides[iop];
+ reduce_outerstrides[iop] =
+ NAD_STRIDES(reduce_outeraxisdata)[iop];
+ stransfer = NULL;
+ }
+ else {
+ /* In this case, the buffer is being used */
+ ptrs[iop] = buffers[iop];
+ strides[iop] = dtypes[iop]->elsize;
+ if (itflags&NPY_ITFLAG_REDUCE) {
+ reduce_outerstrides[iop] = reduce_innersize *
+ strides[iop];
+ reduce_outerptrs[iop] = ptrs[iop];
+ }
+ }
+ break;
+ /* Just a copy, but with a reduction */
+ case NPY_OP_ITFLAG_REDUCE:
+ if (ad_strides[iop] == 0) {
+ strides[iop] = 0;
+ /* It's all in one stride in the inner loop dimension */
+ if (is_onestride) {
+ NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop);
+ ptrs[iop] = ad_ptrs[iop];
+ reduce_outerstrides[iop] = 0;
+ stransfer = NULL;
+ }
+ /* It's all in one stride in the reduce outer loop */
+ else if ((reduce_outerdim > 0) &&
+ (transfersize/reduce_innersize <=
+ NAD_SHAPE(reduce_outeraxisdata) -
+ NAD_INDEX(reduce_outeraxisdata))) {
+ NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n",
+ (int)iop);
+ ptrs[iop] = ad_ptrs[iop];
+ /* Outer reduce loop advances by one item */
+ reduce_outerstrides[iop] =
+ NAD_STRIDES(reduce_outeraxisdata)[iop];
+ stransfer = NULL;
+ }
+ /* In this case, the buffer is being used */
+ else {
+ NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop);
+ ptrs[iop] = buffers[iop];
+ /* Both outer and inner reduce loops have stride 0 */
+ if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
+ reduce_outerstrides[iop] = 0;
+ }
+ /* Outer reduce loop advances by one item */
+ else {
+ reduce_outerstrides[iop] = dtypes[iop]->elsize;
+ }
+ }
+
+ }
+ else if (is_onestride) {
+ NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop);
+ ptrs[iop] = ad_ptrs[iop];
+ strides[iop] = ad_strides[iop];
+ reduce_outerstrides[iop] = 0;
+ stransfer = NULL;
+ }
+ else {
+ /* It's all in one stride in the reduce outer loop */
+ if ((reduce_outerdim > 0) &&
+ (transfersize/reduce_innersize <=
+ NAD_SHAPE(reduce_outeraxisdata) -
+ NAD_INDEX(reduce_outeraxisdata))) {
+ ptrs[iop] = ad_ptrs[iop];
+ strides[iop] = ad_strides[iop];
+ /* Outer reduce loop advances by one item */
+ reduce_outerstrides[iop] =
+ NAD_STRIDES(reduce_outeraxisdata)[iop];
+ stransfer = NULL;
+ }
+ /* In this case, the buffer is being used */
+ else {
+ ptrs[iop] = buffers[iop];
+ strides[iop] = dtypes[iop]->elsize;
+
+ if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
+ /* Reduction in outer reduce loop */
+ reduce_outerstrides[iop] = 0;
+ }
+ else {
+ /* Advance to next items in outer reduce loop */
+ reduce_outerstrides[iop] = reduce_innersize *
+ dtypes[iop]->elsize;
+ }
+ }
+ }
+ reduce_outerptrs[iop] = ptrs[iop];
+ break;
+ default:
+ /* In this case, the buffer is always being used */
+ any_buffered = 1;
+
+ if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) {
+ ptrs[iop] = buffers[iop];
+ strides[iop] = dtypes[iop]->elsize;
+ if (itflags&NPY_ITFLAG_REDUCE) {
+ reduce_outerstrides[iop] = reduce_innersize *
+ strides[iop];
+ reduce_outerptrs[iop] = ptrs[iop];
+ }
+ }
+ /* The buffer is being used with reduction */
+ else {
+ ptrs[iop] = buffers[iop];
+ if (ad_strides[iop] == 0) {
+ NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop);
+ strides[iop] = 0;
+ /* Both outer and inner reduce loops have stride 0 */
+ if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
+ NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
+ reduce_outerstrides[iop] = 0;
+ }
+ /* Outer reduce loop advances by one item */
+ else {
+ NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
+ reduce_outerstrides[iop] = dtypes[iop]->elsize;
+ }
+ }
+ else {
+ NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop);
+ strides[iop] = dtypes[iop]->elsize;
+
+ if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
+ NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
+ /* Reduction in outer reduce loop */
+ reduce_outerstrides[iop] = 0;
+ }
+ else {
+ NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
+ /* Advance to next items in outer reduce loop */
+ reduce_outerstrides[iop] = reduce_innersize *
+ dtypes[iop]->elsize;
+ }
+ }
+ reduce_outerptrs[iop] = ptrs[iop];
+ }
+ break;
+ }
+
+ if (stransfer != NULL) {
+ npy_intp src_itemsize = PyArray_DESCR(operands[iop])->elsize;
+ npy_intp op_transfersize;
+
+ npy_intp dst_stride, *src_strides, *src_coords, *src_shape;
+ int ndim_transfer;
+
+ npy_bool skip_transfer = 0;
+
+ /* If stransfer wasn't set to NULL, buffering is required */
+ any_buffered = 1;
+
+ /*
+ * If this operand is being reduced in the inner loop,
+ * set its buffering stride to zero, and just copy
+ * one element.
+ */
+ if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
+ if (ad_strides[iop] == 0) {
+ strides[iop] = 0;
+ if (reduce_outerstrides[iop] == 0) {
+ op_transfersize = 1;
+ dst_stride = 0;
+ src_strides = &dst_stride;
+ src_coords = &NAD_INDEX(reduce_outeraxisdata);
+ src_shape = &NAD_SHAPE(reduce_outeraxisdata);
+ ndim_transfer = 1;
+
+ /*
+ * When we're reducing a single element, and
+ * it's still the same element, don't overwrite
+ * it even when reuse reduce loops is unset.
+ * This preserves the precision of the
+ * intermediate calculation.
+ */
+ if (prev_dataptrs &&
+ prev_dataptrs[iop] == ad_ptrs[iop]) {
+ NPY_IT_DBG_PRINT1("Iterator: skipping operand %d"
+ " copy because it's a 1-element reduce\n",
+ (int)iop);
+
+ skip_transfer = 1;
+ }
+ }
+ else {
+ op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
+ dst_stride = reduce_outerstrides[iop];
+ src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop];
+ src_coords = &NAD_INDEX(reduce_outeraxisdata);
+ src_shape = &NAD_SHAPE(reduce_outeraxisdata);
+ ndim_transfer = ndim - reduce_outerdim;
+ }
+ }
+ else {
+ if (reduce_outerstrides[iop] == 0) {
+ op_transfersize = NBF_SIZE(bufferdata);
+ dst_stride = strides[iop];
+ src_strides = &ad_strides[iop];
+ src_coords = &NAD_INDEX(axisdata);
+ src_shape = &NAD_SHAPE(axisdata);
+ ndim_transfer = reduce_outerdim ? reduce_outerdim : 1;
+ }
+ else {
+ op_transfersize = transfersize;
+ dst_stride = strides[iop];
+ src_strides = &ad_strides[iop];
+ src_coords = &NAD_INDEX(axisdata);
+ src_shape = &NAD_SHAPE(axisdata);
+ ndim_transfer = ndim;
+ }
+ }
+ }
+ else {
+ op_transfersize = transfersize;
+ dst_stride = strides[iop];
+ src_strides = &ad_strides[iop];
+ src_coords = &NAD_INDEX(axisdata);
+ src_shape = &NAD_SHAPE(axisdata);
+ ndim_transfer = ndim;
+ }
+
+ /*
+ * If the whole buffered loop structure remains the same,
+ * and the source pointer for this data didn't change,
+ * we don't have to copy the data again.
+ */
+ if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) {
+ NPY_IT_DBG_PRINT2("Iterator: skipping operands %d "
+ "copy (%d items) because loops are reused and the data "
+ "pointer didn't change\n",
+ (int)iop, (int)op_transfersize);
+ skip_transfer = 1;
+ }
+
+ /* If the data type requires zero-inititialization */
+ if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
+ NPY_IT_DBG_PRINT("Iterator: Buffer requires init, "
+ "memsetting to 0\n");
+ memset(ptrs[iop], 0, dtypes[iop]->elsize*op_transfersize);
+ /* Can't skip the transfer in this case */
+ skip_transfer = 0;
+ }
+
+ if (!skip_transfer) {
+ NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to "
+ "buffer (%d items)\n",
+ (int)iop, (int)op_transfersize);
+
+ PyArray_TransferNDimToStrided(ndim_transfer,
+ ptrs[iop], dst_stride,
+ ad_ptrs[iop], src_strides, axisdata_incr,
+ src_coords, axisdata_incr,
+ src_shape, axisdata_incr,
+ op_transfersize, src_itemsize,
+ stransfer,
+ transferdata);
+ }
+ }
+ else if (ptrs[iop] == buffers[iop]) {
+ /* If the data type requires zero-inititialization */
+ if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
+ NPY_IT_DBG_PRINT1("Iterator: Write-only buffer for "
+ "operand %d requires init, "
+ "memsetting to 0\n", (int)iop);
+ memset(ptrs[iop], 0, dtypes[iop]->elsize*transfersize);
+ }
+ }
+
+ }
+
+ /*
+ * If buffering wasn't needed, we can grow the inner
+ * loop to as large as possible.
+ *
+ * TODO: Could grow REDUCE loop too with some more logic above.
+ */
+ if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
+ !(itflags&NPY_ITFLAG_REDUCE)) {
+ if (singlestridesize > transfersize) {
+ NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
+ "from %d to %d since buffering wasn't needed\n",
+ (int)NBF_SIZE(bufferdata), (int)singlestridesize);
+ NBF_SIZE(bufferdata) = singlestridesize;
+ NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
+ }
+ }
+
+ NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
+
+ NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
+ "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
+}
+
+/*
+ * This checks how much space can be buffered without encountering the
+ * same value twice, or for operands whose innermost stride is zero,
+ * without encountering a different value. By reducing the buffered
+ * amount to this size, reductions can be safely buffered.
+ *
+ * Reductions are buffered with two levels of looping, to avoid
+ * frequent copying to the buffers. The return value is the over-all
+ * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
+ * receives the size of the inner of the two levels of looping.
+ *
+ * The value placed in reduce_outerdim is the index into the AXISDATA
+ * for where the second level of the double loop begins.
+ *
+ * The return value is always a multiple of the value placed in
+ * reduce_innersize.
+ */
+static npy_intp
+npyiter_checkreducesize(NpyIter *iter, npy_intp count,
+ npy_intp *reduce_innersize,
+ npy_intp *reduce_outerdim)
+{
+ npy_uint32 itflags = NIT_ITFLAGS(iter);
+ int idim, ndim = NIT_NDIM(iter);
+ int iop, nop = NIT_NOP(iter);
+
+ NpyIter_AxisData *axisdata;
+ npy_intp sizeof_axisdata;
+ npy_intp coord, shape, *strides;
+ npy_intp reducespace = 1, factor;
+ npy_bool nonzerocoord = 0;
+
+ char *op_itflags = NIT_OPITFLAGS(iter);
+ char stride0op[NPY_MAXARGS];
+
+ /* Default to no outer axis */
+ *reduce_outerdim = 0;
+
+ /* If there's only one dimension, no need to calculate anything */
+ if (ndim == 1) {
+ *reduce_innersize = count;
+ return count;
+ }
+
+ sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
+ axisdata = NIT_AXISDATA(iter);
+
+ /* Indicate which REDUCE operands have stride 0 in the inner loop */
+ strides = NAD_STRIDES(axisdata);
+ for (iop = 0; iop < nop; ++iop) {
+ stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
+ (strides[iop] == 0);
+ NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
+ "the inner loop? %d\n", iop, (int)stride0op[iop]);
+ }
+ shape = NAD_SHAPE(axisdata);
+ coord = NAD_INDEX(axisdata);
+ reducespace += (shape-coord-1);
+ factor = shape;
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+
+ /* Go forward through axisdata, calculating the space available */
+ for (idim = 1; idim < ndim && reducespace < count;
+ ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+ NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
+ (int)reducespace, (int)count);
+
+ strides = NAD_STRIDES(axisdata);
+ for (iop = 0; iop < nop; ++iop) {
+ /*
+ * If a reduce stride switched from zero to non-zero, or
+ * vice versa, that's the point where the data will stop
+ * being the same element or will repeat, and if the
+ * buffer starts with an all zero multi-index up to this
+ * point, gives us the reduce_innersize.
+ */
+ if((stride0op[iop] && (strides[iop] != 0)) ||
+ (!stride0op[iop] &&
+ (strides[iop] == 0) &&
+ (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
+ NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
+ "buffer to %d\n", (int)reducespace);
+ /*
+ * If we already found more elements than count, or
+ * the starting coordinate wasn't zero, the two-level
+ * looping is unnecessary/can't be done, so return.
+ */
+ if (count <= reducespace) {
+ *reduce_innersize = count;
+ return count;
+ }
+ else if (nonzerocoord) {
+ if (reducespace < count) {
+ count = reducespace;
+ }
+ *reduce_innersize = count;
+ return count;
+ }
+ else {
+ *reduce_innersize = reducespace;
+ break;
+ }
+ }
+ }
+ /* If we broke out of the loop early, we found reduce_innersize */
+ if (iop != nop) {
+ NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
+ "reduce (%d of %d)\n", iop, nop);
+ break;
+ }
+
+ shape = NAD_SHAPE(axisdata);
+ coord = NAD_INDEX(axisdata);
+ if (coord != 0) {
+ nonzerocoord = 1;
+ }
+ reducespace += (shape-coord-1) * factor;
+ factor *= shape;
+ }
+
+ /*
+ * If there was any non-zero coordinate, the reduction inner
+ * loop doesn't fit in the buffersize, or the reduction inner loop
+ * covered the entire iteration size, can't do the double loop.
+ */
+ if (nonzerocoord || count < reducespace || idim == ndim) {
+ if (reducespace < count) {
+ count = reducespace;
+ }
+ *reduce_innersize = count;
+ /* In this case, we can't reuse the reduce loops */
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ return count;
+ }
+
+ /* In this case, we can reuse the reduce loops */
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+
+ *reduce_innersize = reducespace;
+ count /= reducespace;
+
+ NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
+ (int)reducespace, (int)count);
+
+ /*
+ * Continue through the rest of the dimensions. If there are
+ * two separated reduction axes, we may have to cut the buffer
+ * short again.
+ */
+ *reduce_outerdim = idim;
+ reducespace = 1;
+ factor = 1;
+ /* Indicate which REDUCE operands have stride 0 at the current level */
+ strides = NAD_STRIDES(axisdata);
+ for (iop = 0; iop < nop; ++iop) {
+ stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
+ (strides[iop] == 0);
+ NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
+ "the outer loop? %d\n", iop, (int)stride0op[iop]);
+ }
+ shape = NAD_SHAPE(axisdata);
+ coord = NAD_INDEX(axisdata);
+ reducespace += (shape-coord-1) * factor;
+ factor *= shape;
+ NIT_ADVANCE_AXISDATA(axisdata, 1);
+ ++idim;
+
+ for (; idim < ndim && reducespace < count;
+ ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
+ NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
+ (int)reducespace, (int)count);
+ strides = NAD_STRIDES(axisdata);
+ for (iop = 0; iop < nop; ++iop) {
+ /*
+ * If a reduce stride switched from zero to non-zero, or
+ * vice versa, that's the point where the data will stop
+ * being the same element or will repeat, and if the
+ * buffer starts with an all zero multi-index up to this
+ * point, gives us the reduce_innersize.
+ */
+ if((stride0op[iop] && (strides[iop] != 0)) ||
+ (!stride0op[iop] &&
+ (strides[iop] == 0) &&
+ (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
+ NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
+ "buffer to %d\n", (int)reducespace);
+ /*
+ * This terminates the outer level of our double loop.
+ */
+ if (count <= reducespace) {
+ return count * (*reduce_innersize);
+ }
+ else {
+ return reducespace * (*reduce_innersize);
+ }
+ }
+ }
+
+ shape = NAD_SHAPE(axisdata);
+ coord = NAD_INDEX(axisdata);
+ if (coord != 0) {
+ nonzerocoord = 1;
+ }
+ reducespace += (shape-coord-1) * factor;
+ factor *= shape;
+ }
+
+ if (reducespace < count) {
+ count = reducespace;
+ }
+ return count * (*reduce_innersize);
+}
+
+#undef NPY_ITERATOR_IMPLEMENTATION_CODE
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index bc5d9e9dc..774ed65e4 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -2951,6 +2951,4 @@ fail:
return 0;
}
-
-
#undef NPY_ITERATOR_IMPLEMENTATION_CODE