/* * This file implements most of the main API functions of NumPy's nditer. * This excludes functions specialized using the templating system. * * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com) * The University of British Columbia * * Copyright (c) 2011 Enthought, Inc * * See LICENSE.txt for the license. */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION /* Allow this .c file to include nditer_impl.h */ #define NPY_ITERATOR_IMPLEMENTATION_CODE #include "nditer_impl.h" #include "templ_common.h" #include "ctors.h" #include "refcount.h" /* Internal helper functions private to this file */ static npy_intp npyiter_checkreducesize(NpyIter *iter, npy_intp count, npy_intp *reduce_innersize, npy_intp *reduce_outerdim); /*NUMPY_API * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX * was set for iterator creation, and does not work if buffering is * enabled. This function also resets the iterator to its initial state. * * Returns NPY_SUCCEED or NPY_FAIL. */ NPY_NO_EXPORT int NpyIter_RemoveAxis(NpyIter *iter, int axis) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); int xdim = 0; npy_int8 *perm = NIT_PERM(iter); NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata; npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); npy_intp *baseoffsets = NIT_BASEOFFSETS(iter); char **resetdataptr = NIT_RESETDATAPTR(iter); if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { PyErr_SetString(PyExc_RuntimeError, "Iterator RemoveAxis may only be called " "if a multi-index is being tracked"); return NPY_FAIL; } else if (itflags&NPY_ITFLAG_HASINDEX) { PyErr_SetString(PyExc_RuntimeError, "Iterator RemoveAxis may not be called on " "an index is being tracked"); return NPY_FAIL; } else if (itflags&NPY_ITFLAG_BUFFER) { PyErr_SetString(PyExc_RuntimeError, "Iterator RemoveAxis may not be called on " "a buffered iterator"); return NPY_FAIL; } else if (axis < 0 || axis >= ndim) { PyErr_SetString(PyExc_ValueError, "axis out of bounds in iterator RemoveAxis"); return NPY_FAIL; } /* Reverse axis, since the iterator treats them that way */ axis = ndim - 1 - axis; /* First find the axis in question */ for (idim = 0; idim < ndim; ++idim) { /* If this is it, and it's iterated forward, done */ if (perm[idim] == axis) { xdim = idim; break; } /* If this is it, but it's iterated backward, must reverse the axis */ else if (-1 - perm[idim] == axis) { npy_intp *strides = NAD_STRIDES(axisdata_del); npy_intp shape = NAD_SHAPE(axisdata_del), offset; xdim = idim; /* * Adjust baseoffsets and resetbaseptr back to the start of * this axis. */ for (iop = 0; iop < nop; ++iop) { offset = (shape-1)*strides[iop]; baseoffsets[iop] += offset; resetdataptr[iop] += offset; } break; } NIT_ADVANCE_AXISDATA(axisdata_del, 1); } if (idim == ndim) { PyErr_SetString(PyExc_RuntimeError, "internal error in iterator perm"); return NPY_FAIL; } /* Adjust the permutation */ for (idim = 0; idim < ndim-1; ++idim) { npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1]; if (p >= 0) { if (p > axis) { --p; } } else { if (p < -1-axis) { ++p; } } perm[idim] = p; } /* Shift all the axisdata structures by one */ axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1); memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata); /* Adjust the iteration size and reset iterend */ NIT_ITERSIZE(iter) = 1; axisdata = NIT_AXISDATA(iter); for (idim = 0; idim < ndim-1; ++idim) { if (npy_mul_sizes_with_overflow(&NIT_ITERSIZE(iter), NIT_ITERSIZE(iter), NAD_SHAPE(axisdata))) { NIT_ITERSIZE(iter) = -1; break; } NIT_ADVANCE_AXISDATA(axisdata, 1); } NIT_ITEREND(iter) = NIT_ITERSIZE(iter); /* Shrink the iterator */ NIT_NDIM(iter) = ndim - 1; /* If it is now 0-d fill the singleton dimension */ if (ndim == 1) { npy_intp *strides = NAD_STRIDES(axisdata_del); NAD_SHAPE(axisdata_del) = 1; for (iop = 0; iop < nop; ++iop) { strides[iop] = 0; } NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION; } return NpyIter_Reset(iter, NULL); } /*NUMPY_API * Removes multi-index support from an iterator. * * Returns NPY_SUCCEED or NPY_FAIL. */ NPY_NO_EXPORT int NpyIter_RemoveMultiIndex(NpyIter *iter) { npy_uint32 itflags; /* Make sure the iterator is reset */ if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) { return NPY_FAIL; } itflags = NIT_ITFLAGS(iter); if (itflags&NPY_ITFLAG_HASMULTIINDEX) { if (NIT_ITERSIZE(iter) < 0) { PyErr_SetString(PyExc_ValueError, "iterator is too large"); return NPY_FAIL; } NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX; npyiter_coalesce_axes(iter); } return NPY_SUCCEED; } /*NUMPY_API * Removes the inner loop handling (so HasExternalLoop returns true) */ NPY_NO_EXPORT int NpyIter_EnableExternalLoop(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); /* Check conditions under which this can be done */ if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) { PyErr_SetString(PyExc_ValueError, "Iterator flag EXTERNAL_LOOP cannot be used " "if an index or multi-index is being tracked"); return NPY_FAIL; } if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) == (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) { PyErr_SetString(PyExc_ValueError, "Iterator flag EXTERNAL_LOOP cannot be used " "with ranged iteration unless buffering is also enabled"); return NPY_FAIL; } /* Set the flag */ if (!(itflags&NPY_ITFLAG_EXLOOP)) { itflags |= NPY_ITFLAG_EXLOOP; NIT_ITFLAGS(iter) = itflags; /* * Check whether we can apply the single iteration * optimization to the iternext function. */ if (!(itflags&NPY_ITFLAG_BUFFER)) { NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) { NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION; } } } /* Reset the iterator */ return NpyIter_Reset(iter, NULL); } static char *_reset_cast_error = ( "Iterator reset failed due to a casting failure. " "This error is set as a Python error."); /*NUMPY_API * Resets the iterator to its initial state * * The use of errmsg is discouraged, it cannot be guaranteed that the GIL * will not be grabbed on casting errors even when this is passed. * * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. Note that cast errors may still lead to the GIL being * grabbed temporarily. */ NPY_NO_EXPORT int NpyIter_Reset(NpyIter *iter, char **errmsg) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *bufferdata; /* If buffer allocation was delayed, do it now */ if (itflags&NPY_ITFLAG_DELAYBUF) { if (!npyiter_allocate_buffers(iter, errmsg)) { if (errmsg != NULL) { *errmsg = _reset_cast_error; } return NPY_FAIL; } NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF; } else { /* * If the iterindex is already right, no need to * do anything (and no cast error has previously occurred). */ bufferdata = NIT_BUFFERDATA(iter); if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) && NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) && NBF_SIZE(bufferdata) > 0) { return NPY_SUCCEED; } if (npyiter_copy_from_buffers(iter) < 0) { if (errmsg != NULL) { *errmsg = _reset_cast_error; } return NPY_FAIL; } } } npyiter_goto_iterindex(iter, NIT_ITERSTART(iter)); if (itflags&NPY_ITFLAG_BUFFER) { /* Prepare the next buffers and set iterend/size */ if (npyiter_copy_to_buffers(iter, NULL) < 0) { if (errmsg != NULL) { *errmsg = _reset_cast_error; } return NPY_FAIL; } } return NPY_SUCCEED; } /*NUMPY_API * Resets the iterator to its initial state, with new base data pointers. * This function requires great caution. * * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. Note that cast errors may still lead to the GIL being * grabbed temporarily. */ NPY_NO_EXPORT int NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int iop, nop = NIT_NOP(iter); char **resetdataptr = NIT_RESETDATAPTR(iter); npy_intp *baseoffsets = NIT_BASEOFFSETS(iter); if (itflags&NPY_ITFLAG_BUFFER) { /* If buffer allocation was delayed, do it now */ if (itflags&NPY_ITFLAG_DELAYBUF) { if (!npyiter_allocate_buffers(iter, errmsg)) { return NPY_FAIL; } NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF; } else { if (npyiter_copy_from_buffers(iter) < 0) { if (errmsg != NULL) { *errmsg = _reset_cast_error; } return NPY_FAIL; } } } /* The new data pointers for resetting */ for (iop = 0; iop < nop; ++iop) { resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop]; } npyiter_goto_iterindex(iter, NIT_ITERSTART(iter)); if (itflags&NPY_ITFLAG_BUFFER) { /* Prepare the next buffers and set iterend/size */ if (npyiter_copy_to_buffers(iter, NULL) < 0) { if (errmsg != NULL) { *errmsg = _reset_cast_error; } return NPY_FAIL; } } return NPY_SUCCEED; } /*NUMPY_API * Resets the iterator to a new iterator index range * * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. Note that cast errors may still lead to the GIL being * grabbed temporarily. */ NPY_NO_EXPORT int NpyIter_ResetToIterIndexRange(NpyIter *iter, npy_intp istart, npy_intp iend, char **errmsg) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ /*int nop = NIT_NOP(iter);*/ if (!(itflags&NPY_ITFLAG_RANGE)) { if (errmsg == NULL) { PyErr_SetString(PyExc_ValueError, "Cannot call ResetToIterIndexRange on an iterator without " "requesting ranged iteration support in the constructor"); } else { *errmsg = "Cannot call ResetToIterIndexRange on an iterator " "without requesting ranged iteration support in the " "constructor"; } return NPY_FAIL; } if (istart < 0 || iend > NIT_ITERSIZE(iter)) { if (NIT_ITERSIZE(iter) < 0) { if (errmsg == NULL) { PyErr_SetString(PyExc_ValueError, "iterator is too large"); } else { *errmsg = "iterator is too large"; } return NPY_FAIL; } if (errmsg == NULL) { PyErr_Format(PyExc_ValueError, "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to " "ResetToIterIndexRange", istart, iend); } else { *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange"; } return NPY_FAIL; } else if (iend < istart) { if (errmsg == NULL) { PyErr_Format(PyExc_ValueError, "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange", istart, iend); } else { *errmsg = "Invalid range passed to ResetToIterIndexRange"; } return NPY_FAIL; } NIT_ITERSTART(iter) = istart; NIT_ITEREND(iter) = iend; return NpyIter_Reset(iter, errmsg); } /*NUMPY_API * Sets the iterator to the specified multi-index, which must have the * correct number of entries for 'ndim'. It is only valid * when NPY_ITER_MULTI_INDEX was passed to the constructor. This operation * fails if the multi-index is out of bounds. * * Returns NPY_SUCCEED on success, NPY_FAIL on failure. */ NPY_NO_EXPORT int NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp const *multi_index) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_intp iterindex, factor; NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; npy_int8 *perm; if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoMultiIndex on an iterator without " "requesting a multi-index in the constructor"); return NPY_FAIL; } if (itflags&NPY_ITFLAG_BUFFER) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoMultiIndex on an iterator which " "is buffered"); return NPY_FAIL; } if (itflags&NPY_ITFLAG_EXLOOP) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoMultiIndex on an iterator which " "has the flag EXTERNAL_LOOP"); return NPY_FAIL; } perm = NIT_PERM(iter); axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); /* Compute the iterindex corresponding to the multi-index */ iterindex = 0; factor = 1; for (idim = 0; idim < ndim; ++idim) { npy_int8 p = perm[idim]; npy_intp i, shape; shape = NAD_SHAPE(axisdata); if (p < 0) { /* If the perm entry is negative, reverse the index */ i = shape - multi_index[ndim+p] - 1; } else { i = multi_index[ndim-p-1]; } /* Bounds-check this index */ if (i >= 0 && i < shape) { iterindex += factor * i; factor *= shape; } else { PyErr_SetString(PyExc_IndexError, "Iterator GotoMultiIndex called with an out-of-bounds " "multi-index"); return NPY_FAIL; } NIT_ADVANCE_AXISDATA(axisdata, 1); } if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) { if (NIT_ITERSIZE(iter) < 0) { PyErr_SetString(PyExc_ValueError, "iterator is too large"); return NPY_FAIL; } PyErr_SetString(PyExc_IndexError, "Iterator GotoMultiIndex called with a multi-index outside the " "restricted iteration range"); return NPY_FAIL; } npyiter_goto_iterindex(iter, iterindex); return NPY_SUCCEED; } /*NUMPY_API * If the iterator is tracking an index, sets the iterator * to the specified index. * * Returns NPY_SUCCEED on success, NPY_FAIL on failure. */ NPY_NO_EXPORT int NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_intp iterindex, factor; NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; if (!(itflags&NPY_ITFLAG_HASINDEX)) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoIndex on an iterator without " "requesting a C or Fortran index in the constructor"); return NPY_FAIL; } if (itflags&NPY_ITFLAG_BUFFER) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoIndex on an iterator which " "is buffered"); return NPY_FAIL; } if (itflags&NPY_ITFLAG_EXLOOP) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoIndex on an iterator which " "has the flag EXTERNAL_LOOP"); return NPY_FAIL; } if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) { PyErr_SetString(PyExc_IndexError, "Iterator GotoIndex called with an out-of-bounds " "index"); return NPY_FAIL; } axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); /* Compute the iterindex corresponding to the flat_index */ iterindex = 0; factor = 1; for (idim = 0; idim < ndim; ++idim) { npy_intp i, shape, iterstride; iterstride = NAD_STRIDES(axisdata)[nop]; shape = NAD_SHAPE(axisdata); /* Extract the index from the flat_index */ if (iterstride == 0) { i = 0; } else if (iterstride < 0) { i = shape - (flat_index/(-iterstride))%shape - 1; } else { i = (flat_index/iterstride)%shape; } /* Add its contribution to iterindex */ iterindex += factor * i; factor *= shape; NIT_ADVANCE_AXISDATA(axisdata, 1); } if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) { PyErr_SetString(PyExc_IndexError, "Iterator GotoIndex called with an index outside the " "restricted iteration range."); return NPY_FAIL; } npyiter_goto_iterindex(iter, iterindex); return NPY_SUCCEED; } /*NUMPY_API * Sets the iterator position to the specified iterindex, * which matches the iteration order of the iterator. * * Returns NPY_SUCCEED on success, NPY_FAIL on failure. */ NPY_NO_EXPORT int NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int iop, nop = NIT_NOP(iter); if (itflags&NPY_ITFLAG_EXLOOP) { PyErr_SetString(PyExc_ValueError, "Cannot call GotoIterIndex on an iterator which " "has the flag EXTERNAL_LOOP"); return NPY_FAIL; } if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) { if (NIT_ITERSIZE(iter) < 0) { PyErr_SetString(PyExc_ValueError, "iterator is too large"); return NPY_FAIL; } PyErr_SetString(PyExc_IndexError, "Iterator GotoIterIndex called with an iterindex outside the " "iteration range."); return NPY_FAIL; } if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); npy_intp bufiterend, size; size = NBF_SIZE(bufferdata); bufiterend = NBF_BUFITEREND(bufferdata); /* Check if the new iterindex is already within the buffer */ if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend && iterindex >= bufiterend - size) { npy_intp *strides, delta; char **ptrs; strides = NBF_STRIDES(bufferdata); ptrs = NBF_PTRS(bufferdata); delta = iterindex - NIT_ITERINDEX(iter); for (iop = 0; iop < nop; ++iop) { ptrs[iop] += delta * strides[iop]; } NIT_ITERINDEX(iter) = iterindex; } /* Start the buffer at the provided iterindex */ else { /* Write back to the arrays */ if (npyiter_copy_from_buffers(iter) < 0) { return NPY_FAIL; } npyiter_goto_iterindex(iter, iterindex); /* Prepare the next buffers and set iterend/size */ if (npyiter_copy_to_buffers(iter, NULL) < 0) { return NPY_FAIL; } } } else { npyiter_goto_iterindex(iter, iterindex); } return NPY_SUCCEED; } /*NUMPY_API * Gets the current iteration index */ NPY_NO_EXPORT npy_intp NpyIter_GetIterIndex(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); /* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */ if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) { return NIT_ITERINDEX(iter); } else { npy_intp iterindex; NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; iterindex = 0; if (ndim == 0) { return 0; } sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1); for (idim = ndim-2; idim >= 0; --idim) { iterindex += NAD_INDEX(axisdata); NIT_ADVANCE_AXISDATA(axisdata, -1); iterindex *= NAD_SHAPE(axisdata); } iterindex += NAD_INDEX(axisdata); return iterindex; } } /*NUMPY_API * Whether the buffer allocation is being delayed */ NPY_NO_EXPORT npy_bool NpyIter_HasDelayedBufAlloc(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0; } /*NUMPY_API * Whether the iterator handles the inner loop */ NPY_NO_EXPORT npy_bool NpyIter_HasExternalLoop(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0; } /*NUMPY_API * Whether the iterator is tracking a multi-index */ NPY_NO_EXPORT npy_bool NpyIter_HasMultiIndex(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0; } /*NUMPY_API * Whether the iterator is tracking an index */ NPY_NO_EXPORT npy_bool NpyIter_HasIndex(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0; } /*NUMPY_API * Checks to see whether this is the first time the elements * of the specified reduction operand which the iterator points at are * being seen for the first time. The function returns * a reasonable answer for reduction operands and when buffering is * disabled. The answer may be incorrect for buffered non-reduction * operands. * * This function is intended to be used in EXTERNAL_LOOP mode only, * and will produce some wrong answers when that mode is not enabled. * * If this function returns true, the caller should also * check the inner loop stride of the operand, because if * that stride is 0, then only the first element of the innermost * external loop is being visited for the first time. * * WARNING: For performance reasons, 'iop' is not bounds-checked, * it is not confirmed that 'iop' is actually a reduction * operand, and it is not confirmed that EXTERNAL_LOOP * mode is enabled. These checks are the responsibility of * the caller, and should be done outside of any inner loops. */ NPY_NO_EXPORT npy_bool NpyIter_IsFirstVisit(NpyIter *iter, int iop) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); axisdata = NIT_AXISDATA(iter); for (idim = 0; idim < ndim; ++idim) { npy_intp coord = NAD_INDEX(axisdata); npy_intp stride = NAD_STRIDES(axisdata)[iop]; /* * If this is a reduction dimension and the coordinate * is not at the start, it's definitely not the first visit */ if (stride == 0 && coord != 0) { return 0; } NIT_ADVANCE_AXISDATA(axisdata, 1); } /* * In reduction buffering mode, there's a double loop being * tracked in the buffer part of the iterator data structure. * We only need to check the outer level of this two-level loop, * because of the requirement that EXTERNAL_LOOP be enabled. */ if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); /* The outer reduce loop */ if (NBF_REDUCE_POS(bufferdata) != 0 && NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop] == 0) { return 0; } } return 1; } /*NUMPY_API * Whether the iteration could be done with no buffering. */ NPY_NO_EXPORT npy_bool NpyIter_RequiresBuffering(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int iop, nop = NIT_NOP(iter); npyiter_opitflags *op_itflags; if (!(itflags&NPY_ITFLAG_BUFFER)) { return 0; } op_itflags = NIT_OPITFLAGS(iter); /* If any operand requires a cast, buffering is mandatory */ for (iop = 0; iop < nop; ++iop) { if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) { return 1; } } return 0; } /*NUMPY_API * Whether the iteration loop, and in particular the iternext() * function, needs API access. If this is true, the GIL must * be retained while iterating. * * NOTE: Internally (currently), `NpyIter_GetTransferFlags` will * additionally provide information on whether floating point errors * may be given during casts. The flags only require the API use * necessary for buffering though. So an iterate which does not require * buffering may indicate `NpyIter_IterationNeedsAPI`, but not include * the flag in `NpyIter_GetTransferFlags`. */ NPY_NO_EXPORT npy_bool NpyIter_IterationNeedsAPI(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0; } /* * Fetch the ArrayMethod (runtime) flags for all "transfer functions' (i.e. * copy to buffer/casts). * * TODO: This should be public API, but that only makes sense when the * ArrayMethod API is made public. */ NPY_NO_EXPORT int NpyIter_GetTransferFlags(NpyIter *iter) { return NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT; } /*NUMPY_API * Gets the number of dimensions being iterated */ NPY_NO_EXPORT int NpyIter_GetNDim(NpyIter *iter) { return NIT_NDIM(iter); } /*NUMPY_API * Gets the number of operands being iterated */ NPY_NO_EXPORT int NpyIter_GetNOp(NpyIter *iter) { return NIT_NOP(iter); } /*NUMPY_API * Gets the number of elements being iterated */ NPY_NO_EXPORT npy_intp NpyIter_GetIterSize(NpyIter *iter) { return NIT_ITERSIZE(iter); } /*NUMPY_API * Whether the iterator is buffered */ NPY_NO_EXPORT npy_bool NpyIter_IsBuffered(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0; } /*NUMPY_API * Whether the inner loop can grow if buffering is unneeded */ NPY_NO_EXPORT npy_bool NpyIter_IsGrowInner(NpyIter *iter) { return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0; } /*NUMPY_API * Gets the size of the buffer, or 0 if buffering is not enabled */ NPY_NO_EXPORT npy_intp NpyIter_GetBufferSize(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); return NBF_BUFFERSIZE(bufferdata); } else { return 0; } } /*NUMPY_API * Gets the range of iteration indices being iterated */ NPY_NO_EXPORT void NpyIter_GetIterIndexRange(NpyIter *iter, npy_intp *istart, npy_intp *iend) { *istart = NIT_ITERSTART(iter); *iend = NIT_ITEREND(iter); } /*NUMPY_API * Gets the broadcast shape if a multi-index is being tracked by the iterator, * otherwise gets the shape of the iteration as Fortran-order * (fastest-changing index first). * * The reason Fortran-order is returned when a multi-index * is not enabled is that this is providing a direct view into how * the iterator traverses the n-dimensional space. The iterator organizes * its memory from fastest index to slowest index, and when * a multi-index is enabled, it uses a permutation to recover the original * order. * * Returns NPY_SUCCEED or NPY_FAIL. */ NPY_NO_EXPORT int NpyIter_GetShape(NpyIter *iter, npy_intp *outshape) { npy_uint32 itflags = NIT_ITFLAGS(iter); int ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); int idim, sizeof_axisdata; NpyIter_AxisData *axisdata; npy_int8 *perm; axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); if (itflags&NPY_ITFLAG_HASMULTIINDEX) { perm = NIT_PERM(iter); for(idim = 0; idim < ndim; ++idim) { int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL); outshape[axis] = NAD_SHAPE(axisdata); NIT_ADVANCE_AXISDATA(axisdata, 1); } } else { for(idim = 0; idim < ndim; ++idim) { outshape[idim] = NAD_SHAPE(axisdata); NIT_ADVANCE_AXISDATA(axisdata, 1); } } return NPY_SUCCEED; } /*NUMPY_API * Builds a set of strides which are the same as the strides of an * output array created using the NPY_ITER_ALLOCATE flag, where NULL * was passed for op_axes. This is for data packed contiguously, * but not necessarily in C or Fortran order. This should be used * together with NpyIter_GetShape and NpyIter_GetNDim. * * A use case for this function is to match the shape and layout of * the iterator and tack on one or more dimensions. For example, * in order to generate a vector per input value for a numerical gradient, * you pass in ndim*itemsize for itemsize, then add another dimension to * the end with size ndim and stride itemsize. To do the Hessian matrix, * you do the same thing but add two dimensions, or take advantage of * the symmetry and pack it into 1 dimension with a particular encoding. * * This function may only be called if the iterator is tracking a multi-index * and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from * being iterated in reverse order. * * If an array is created with this method, simply adding 'itemsize' * for each iteration will traverse the new array matching the * iterator. * * Returns NPY_SUCCEED or NPY_FAIL. */ NPY_NO_EXPORT int NpyIter_CreateCompatibleStrides(NpyIter *iter, npy_intp itemsize, npy_intp *outstrides) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_intp sizeof_axisdata; NpyIter_AxisData *axisdata; npy_int8 *perm; if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) { PyErr_SetString(PyExc_RuntimeError, "Iterator CreateCompatibleStrides may only be called " "if a multi-index is being tracked"); return NPY_FAIL; } axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); perm = NIT_PERM(iter); for(idim = 0; idim < ndim; ++idim) { npy_bool flipped; npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped); if (flipped) { PyErr_SetString(PyExc_RuntimeError, "Iterator CreateCompatibleStrides may only be called " "if DONT_NEGATE_STRIDES was used to prevent reverse " "iteration of an axis"); return NPY_FAIL; } else { outstrides[axis] = itemsize; } itemsize *= NAD_SHAPE(axisdata); NIT_ADVANCE_AXISDATA(axisdata, 1); } return NPY_SUCCEED; } /*NUMPY_API * Get the array of data pointers (1 per object being iterated) * * This function may be safely called without holding the Python GIL. */ NPY_NO_EXPORT char ** NpyIter_GetDataPtrArray(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); return NBF_PTRS(bufferdata); } else { NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); return NAD_PTRS(axisdata); } } /*NUMPY_API * Get the array of data pointers (1 per object being iterated), * directly into the arrays (never pointing to a buffer), for starting * unbuffered iteration. This always returns the addresses for the * iterator position as reset to iterator index 0. * * These pointers are different from the pointers accepted by * NpyIter_ResetBasePointers, because the direction along some * axes may have been reversed, requiring base offsets. * * This function may be safely called without holding the Python GIL. */ NPY_NO_EXPORT char ** NpyIter_GetInitialDataPtrArray(NpyIter *iter) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); return NIT_RESETDATAPTR(iter); } /*NUMPY_API * Get the array of data type pointers (1 per object being iterated) */ NPY_NO_EXPORT PyArray_Descr ** NpyIter_GetDescrArray(NpyIter *iter) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ /*int ndim = NIT_NDIM(iter);*/ /*int nop = NIT_NOP(iter);*/ return NIT_DTYPES(iter); } /*NUMPY_API * Get the array of objects being iterated */ NPY_NO_EXPORT PyArrayObject ** NpyIter_GetOperandArray(NpyIter *iter) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); return NIT_OPERANDS(iter); } /*NUMPY_API * Returns a view to the i-th object with the iterator's internal axes */ NPY_NO_EXPORT PyArrayObject * NpyIter_GetIterView(NpyIter *iter, npy_intp i) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; PyArrayObject *obj, *view; PyArray_Descr *dtype; char *dataptr; NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; int writeable; if (i < 0) { PyErr_SetString(PyExc_IndexError, "index provided for an iterator view was out of bounds"); return NULL; } /* Don't provide views if buffering is enabled */ if (itflags&NPY_ITFLAG_BUFFER) { PyErr_SetString(PyExc_ValueError, "cannot provide an iterator view when buffering is enabled"); return NULL; } obj = NIT_OPERANDS(iter)[i]; dtype = PyArray_DESCR(obj); writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE; dataptr = NIT_RESETDATAPTR(iter)[i]; axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); /* Retrieve the shape and strides from the axisdata */ for (idim = 0; idim < ndim; ++idim) { shape[ndim-idim-1] = NAD_SHAPE(axisdata); strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i]; NIT_ADVANCE_AXISDATA(axisdata, 1); } Py_INCREF(dtype); view = (PyArrayObject *)PyArray_NewFromDescrAndBase( &PyArray_Type, dtype, ndim, shape, strides, dataptr, writeable ? NPY_ARRAY_WRITEABLE : 0, NULL, (PyObject *)obj); return view; } /*NUMPY_API * Get a pointer to the index, if it is being tracked */ NPY_NO_EXPORT npy_intp * NpyIter_GetIndexPtr(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); if (itflags&NPY_ITFLAG_HASINDEX) { /* The index is just after the data pointers */ return (npy_intp*)NAD_PTRS(axisdata) + nop; } else { return NULL; } } /*NUMPY_API * Gets an array of read flags (1 per object being iterated) */ NPY_NO_EXPORT void NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ /*int ndim = NIT_NDIM(iter);*/ int iop, nop = NIT_NOP(iter); npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); for (iop = 0; iop < nop; ++iop) { outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0; } } /*NUMPY_API * Gets an array of write flags (1 per object being iterated) */ NPY_NO_EXPORT void NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ /*int ndim = NIT_NDIM(iter);*/ int iop, nop = NIT_NOP(iter); npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); for (iop = 0; iop < nop; ++iop) { outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0; } } /*NUMPY_API * Get the array of strides for the inner loop (when HasExternalLoop is true) * * This function may be safely called without holding the Python GIL. */ NPY_NO_EXPORT npy_intp * NpyIter_GetInnerStrideArray(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *data = NIT_BUFFERDATA(iter); return NBF_STRIDES(data); } else { NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); return NAD_STRIDES(axisdata); } } /*NUMPY_API * Gets the array of strides for the specified axis. * If the iterator is tracking a multi-index, gets the strides * for the axis specified, otherwise gets the strides for * the iteration axis as Fortran order (fastest-changing axis first). * * Returns NULL if an error occurs. */ NPY_NO_EXPORT npy_intp * NpyIter_GetAxisStrideArray(NpyIter *iter, int axis) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_int8 *perm = NIT_PERM(iter); NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); if (axis < 0 || axis >= ndim) { PyErr_SetString(PyExc_ValueError, "axis out of bounds in iterator GetStrideAxisArray"); return NULL; } if (itflags&NPY_ITFLAG_HASMULTIINDEX) { /* Reverse axis, since the iterator treats them that way */ axis = ndim-1-axis; /* First find the axis in question */ for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { if (perm[idim] == axis || -1 - perm[idim] == axis) { return NAD_STRIDES(axisdata); } } } else { return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis)); } PyErr_SetString(PyExc_RuntimeError, "internal error in iterator perm"); return NULL; } /*NUMPY_API * Get an array of strides which are fixed. Any strides which may * change during iteration receive the value NPY_MAX_INTP. Once * the iterator is ready to iterate, call this to get the strides * which will always be fixed in the inner loop, then choose optimized * inner loop functions which take advantage of those fixed strides. * * This function may be safely called without holding the Python GIL. */ NPY_NO_EXPORT void NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides) { npy_uint32 itflags = NIT_ITFLAGS(iter); int ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter); npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *data = NIT_BUFFERDATA(iter); npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); npy_intp stride, *strides = NBF_STRIDES(data), *ad_strides = NAD_STRIDES(axisdata0); PyArray_Descr **dtypes = NIT_DTYPES(iter); for (iop = 0; iop < nop; ++iop) { stride = strides[iop]; /* * Operands which are always/never buffered have fixed strides, * and everything has fixed strides when ndim is 0 or 1 */ if (ndim <= 1 || (op_itflags[iop]& (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) { out_strides[iop] = stride; } /* If it's a reduction, 0-stride inner loop may have fixed stride */ else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) { /* If it's a reduction operand, definitely fixed stride */ if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) { out_strides[iop] = stride; } /* * Otherwise it's guaranteed to be a fixed stride if the * stride is 0 for all the dimensions. */ else { NpyIter_AxisData *axisdata = axisdata0; int idim; for (idim = 0; idim < ndim; ++idim) { if (NAD_STRIDES(axisdata)[iop] != 0) { break; } NIT_ADVANCE_AXISDATA(axisdata, 1); } /* If all the strides were 0, the stride won't change */ if (idim == ndim) { out_strides[iop] = stride; } else { out_strides[iop] = NPY_MAX_INTP; } } } /* * Inner loop contiguous array means its stride won't change when * switching between buffering and not buffering */ else if (ad_strides[iop] == dtypes[iop]->elsize) { out_strides[iop] = ad_strides[iop]; } /* * Otherwise the strides can change if the operand is sometimes * buffered, sometimes not. */ else { out_strides[iop] = NPY_MAX_INTP; } } } else { /* If there's no buffering, the strides are always fixed */ memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP); } } /*NUMPY_API * Get a pointer to the size of the inner loop (when HasExternalLoop is true) * * This function may be safely called without holding the Python GIL. */ NPY_NO_EXPORT npy_intp * NpyIter_GetInnerLoopSizePtr(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); /*int ndim = NIT_NDIM(iter);*/ int nop = NIT_NOP(iter); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *data = NIT_BUFFERDATA(iter); return &NBF_SIZE(data); } else { NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); return &NAD_SHAPE(axisdata); } } /*NUMPY_API * For debugging */ NPY_NO_EXPORT void NpyIter_DebugPrint(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API printf("\n------ BEGIN ITERATOR DUMP ------\n"); printf("| Iterator Address: %p\n", (void *)iter); printf("| ItFlags: "); if (itflags&NPY_ITFLAG_IDENTPERM) printf("IDENTPERM "); if (itflags&NPY_ITFLAG_NEGPERM) printf("NEGPERM "); if (itflags&NPY_ITFLAG_HASINDEX) printf("HASINDEX "); if (itflags&NPY_ITFLAG_HASMULTIINDEX) printf("HASMULTIINDEX "); if (itflags&NPY_ITFLAG_FORCEDORDER) printf("FORCEDORDER "); if (itflags&NPY_ITFLAG_EXLOOP) printf("EXLOOP "); if (itflags&NPY_ITFLAG_RANGE) printf("RANGE "); if (itflags&NPY_ITFLAG_BUFFER) printf("BUFFER "); if (itflags&NPY_ITFLAG_GROWINNER) printf("GROWINNER "); if (itflags&NPY_ITFLAG_ONEITERATION) printf("ONEITERATION "); if (itflags&NPY_ITFLAG_DELAYBUF) printf("DELAYBUF "); if (itflags&NPY_ITFLAG_NEEDSAPI) printf("NEEDSAPI "); if (itflags&NPY_ITFLAG_REDUCE) printf("REDUCE "); if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) printf("REUSE_REDUCE_LOOPS "); printf("\n"); printf("| NDim: %d\n", ndim); printf("| NOp: %d\n", nop); if (NIT_MASKOP(iter) >= 0) { printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter)); } printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter)); printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter)); printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter)); printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter)); printf("| Iterator SizeOf: %d\n", (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop)); printf("| BufferData SizeOf: %d\n", (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop)); printf("| AxisData SizeOf: %d\n", (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop)); printf("|\n"); printf("| Perm: "); for (idim = 0; idim < ndim; ++idim) { printf("%d ", (int)NIT_PERM(iter)[idim]); } printf("\n"); printf("| DTypes: "); for (iop = 0; iop < nop; ++iop) { printf("%p ", (void *)NIT_DTYPES(iter)[iop]); } printf("\n"); printf("| DTypes: "); for (iop = 0; iop < nop; ++iop) { if (NIT_DTYPES(iter)[iop] != NULL) PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0); else printf("(nil) "); printf(" "); } printf("\n"); printf("| InitDataPtrs: "); for (iop = 0; iop < nop; ++iop) { printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]); } printf("\n"); printf("| BaseOffsets: "); for (iop = 0; iop < nop; ++iop) { printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]); } printf("\n"); if (itflags&NPY_ITFLAG_HASINDEX) { printf("| InitIndex: %d\n", (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]); } printf("| Operands: "); for (iop = 0; iop < nop; ++iop) { printf("%p ", (void *)NIT_OPERANDS(iter)[iop]); } printf("\n"); printf("| Operand DTypes: "); for (iop = 0; iop < nop; ++iop) { PyArray_Descr *dtype; if (NIT_OPERANDS(iter)[iop] != NULL) { dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]); if (dtype != NULL) PyObject_Print((PyObject *)dtype, stdout, 0); else printf("(nil) "); } else { printf("(op nil) "); } printf(" "); } printf("\n"); printf("| OpItFlags:\n"); for (iop = 0; iop < nop; ++iop) { printf("| Flags[%d]: ", (int)iop); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ) printf("READ "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE) printf("WRITE "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST) printf("CAST "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER) printf("BUFNEVER "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED) printf("ALIGNED "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE) printf("REDUCE "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_VIRTUAL) printf("VIRTUAL "); if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITEMASKED) printf("WRITEMASKED "); printf("\n"); } printf("|\n"); if (itflags&NPY_ITFLAG_BUFFER) { NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata); printf("| BufferData:\n"); printf("| BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata)); printf("| Size: %d\n", (int)NBF_SIZE(bufferdata)); printf("| BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata)); if (itflags&NPY_ITFLAG_REDUCE) { printf("| REDUCE Pos: %d\n", (int)NBF_REDUCE_POS(bufferdata)); printf("| REDUCE OuterSize: %d\n", (int)NBF_REDUCE_OUTERSIZE(bufferdata)); printf("| REDUCE OuterDim: %d\n", (int)NBF_REDUCE_OUTERDIM(bufferdata)); } printf("| Strides: "); for (iop = 0; iop < nop; ++iop) printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]); printf("\n"); /* Print the fixed strides when there's no inner loop */ if (itflags&NPY_ITFLAG_EXLOOP) { npy_intp fixedstrides[NPY_MAXDIMS]; printf("| Fixed Strides: "); NpyIter_GetInnerFixedStrideArray(iter, fixedstrides); for (iop = 0; iop < nop; ++iop) printf("%d ", (int)fixedstrides[iop]); printf("\n"); } printf("| Ptrs: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]); printf("\n"); if (itflags&NPY_ITFLAG_REDUCE) { printf("| REDUCE Outer Strides: "); for (iop = 0; iop < nop; ++iop) printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]); printf("\n"); printf("| REDUCE Outer Ptrs: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]); printf("\n"); } printf("| ReadTransferFn: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)transferinfo[iop].read.func); printf("\n"); printf("| ReadTransferData: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)transferinfo[iop].read.auxdata); printf("\n"); printf("| WriteTransferFn: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)transferinfo[iop].write.func); printf("\n"); printf("| WriteTransferData: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)transferinfo[iop].write.auxdata); printf("\n"); printf("| Buffers: "); for (iop = 0; iop < nop; ++iop) printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]); printf("\n"); printf("|\n"); } axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { printf("| AxisData[%d]:\n", (int)idim); printf("| Shape: %d\n", (int)NAD_SHAPE(axisdata)); printf("| Index: %d\n", (int)NAD_INDEX(axisdata)); printf("| Strides: "); for (iop = 0; iop < nop; ++iop) { printf("%d ", (int)NAD_STRIDES(axisdata)[iop]); } printf("\n"); if (itflags&NPY_ITFLAG_HASINDEX) { printf("| Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]); } printf("| Ptrs: "); for (iop = 0; iop < nop; ++iop) { printf("%p ", (void *)NAD_PTRS(axisdata)[iop]); } printf("\n"); if (itflags&NPY_ITFLAG_HASINDEX) { printf("| Index Value: %d\n", (int)((npy_intp*)NAD_PTRS(axisdata))[nop]); } } printf("------- END ITERATOR DUMP -------\n"); fflush(stdout); NPY_DISABLE_C_API } NPY_NO_EXPORT void npyiter_coalesce_axes(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); npy_intp istrides, nstrides = NAD_NSTRIDES(); NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); NpyIter_AxisData *ad_compress = axisdata; npy_intp new_ndim = 1; /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */ NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX); for (idim = 0; idim < ndim-1; ++idim) { int can_coalesce = 1; npy_intp shape0 = NAD_SHAPE(ad_compress); npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1)); npy_intp *strides0 = NAD_STRIDES(ad_compress); npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1)); /* Check that all the axes can be coalesced */ for (istrides = 0; istrides < nstrides; ++istrides) { if (!((shape0 == 1 && strides0[istrides] == 0) || (shape1 == 1 && strides1[istrides] == 0)) && (strides0[istrides]*shape0 != strides1[istrides])) { can_coalesce = 0; break; } } if (can_coalesce) { npy_intp *strides = NAD_STRIDES(ad_compress); NIT_ADVANCE_AXISDATA(axisdata, 1); NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata); for (istrides = 0; istrides < nstrides; ++istrides) { if (strides[istrides] == 0) { strides[istrides] = NAD_STRIDES(axisdata)[istrides]; } } } else { NIT_ADVANCE_AXISDATA(axisdata, 1); NIT_ADVANCE_AXISDATA(ad_compress, 1); if (ad_compress != axisdata) { memcpy(ad_compress, axisdata, sizeof_axisdata); } ++new_ndim; } } /* * If the number of axes shrunk, reset the perm and * compress the data into the new layout. */ if (new_ndim < ndim) { npy_int8 *perm = NIT_PERM(iter); /* Reset to an identity perm */ for (idim = 0; idim < new_ndim; ++idim) { perm[idim] = (npy_int8)idim; } NIT_NDIM(iter) = new_ndim; } } /* * If errmsg is non-NULL, it should point to a variable which will * receive the error message, and no Python exception will be set. * This is so that the function can be called from code not holding * the GIL. */ NPY_NO_EXPORT int npyiter_allocate_buffers(NpyIter *iter, char **errmsg) { /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/ /*int ndim = NIT_NDIM(iter);*/ int iop = 0, nop = NIT_NOP(iter); npy_intp i; npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); PyArray_Descr **op_dtype = NIT_DTYPES(iter); npy_intp buffersize = NBF_BUFFERSIZE(bufferdata); char *buffer, **buffers = NBF_BUFFERS(bufferdata); for (iop = 0; iop < nop; ++iop) { npyiter_opitflags flags = op_itflags[iop]; /* * If we have determined that a buffer may be needed, * allocate one. */ if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) { npy_intp itemsize = op_dtype[iop]->elsize; buffer = PyArray_malloc(itemsize*buffersize); if (buffer == NULL) { if (errmsg == NULL) { PyErr_NoMemory(); } else { *errmsg = "out of memory"; } goto fail; } if (PyDataType_FLAGCHK(op_dtype[iop], NPY_NEEDS_INIT)) { memset(buffer, '\0', itemsize*buffersize); } buffers[iop] = buffer; } } return 1; fail: for (i = 0; i < iop; ++i) { if (buffers[i] != NULL) { PyArray_free(buffers[i]); buffers[i] = NULL; } } return 0; } /* * This sets the AXISDATA portion of the iterator to the specified * iterindex, updating the pointers as well. This function does * no error checking. */ NPY_NO_EXPORT void npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int nop = NIT_NOP(iter); char **dataptr; NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; npy_intp istrides, nstrides, i, shape; axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); nstrides = NAD_NSTRIDES(); NIT_ITERINDEX(iter) = iterindex; ndim = ndim ? ndim : 1; if (iterindex == 0) { dataptr = NIT_RESETDATAPTR(iter); for (idim = 0; idim < ndim; ++idim) { char **ptrs; NAD_INDEX(axisdata) = 0; ptrs = NAD_PTRS(axisdata); for (istrides = 0; istrides < nstrides; ++istrides) { ptrs[istrides] = dataptr[istrides]; } NIT_ADVANCE_AXISDATA(axisdata, 1); } } else { /* * Set the multi-index, from the fastest-changing to the * slowest-changing. */ axisdata = NIT_AXISDATA(iter); shape = NAD_SHAPE(axisdata); i = iterindex; iterindex /= shape; NAD_INDEX(axisdata) = i - iterindex * shape; for (idim = 0; idim < ndim-1; ++idim) { NIT_ADVANCE_AXISDATA(axisdata, 1); shape = NAD_SHAPE(axisdata); i = iterindex; iterindex /= shape; NAD_INDEX(axisdata) = i - iterindex * shape; } dataptr = NIT_RESETDATAPTR(iter); /* * Accumulate the successive pointers with their * offsets in the opposite order, starting from the * original data pointers. */ for (idim = 0; idim < ndim; ++idim) { npy_intp *strides; char **ptrs; strides = NAD_STRIDES(axisdata); ptrs = NAD_PTRS(axisdata); i = NAD_INDEX(axisdata); for (istrides = 0; istrides < nstrides; ++istrides) { ptrs[istrides] = dataptr[istrides] + i*strides[istrides]; } dataptr = ptrs; NIT_ADVANCE_AXISDATA(axisdata, -1); } } } /* * This gets called after the buffers have been exhausted, and * their data needs to be written back to the arrays. The multi-index * must be positioned for the beginning of the buffer. */ NPY_NO_EXPORT int npyiter_copy_from_buffers(NpyIter *iter) { npy_uint32 itflags = NIT_ITFLAGS(iter); int ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); int maskop = NIT_MASKOP(iter); npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); NpyIter_AxisData *axisdata = NIT_AXISDATA(iter), *reduce_outeraxisdata = NULL; PyArray_Descr **dtypes = NIT_DTYPES(iter); npy_intp transfersize = NBF_SIZE(bufferdata); npy_intp *strides = NBF_STRIDES(bufferdata), *ad_strides = NAD_STRIDES(axisdata); npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); char **ad_ptrs = NAD_PTRS(axisdata); char **buffers = NBF_BUFFERS(bufferdata); char *buffer; npy_intp reduce_outerdim = 0; npy_intp *reduce_outerstrides = NULL; npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) / NPY_SIZEOF_INTP; /* If we're past the end, nothing to copy */ if (NBF_SIZE(bufferdata) == 0) { return 0; } NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n"); if (itflags&NPY_ITFLAG_REDUCE) { reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata); reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim); transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata); } NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata); for (iop = 0; iop < nop; ++iop) { buffer = buffers[iop]; /* * Copy the data back to the arrays. If the type has refs, * this function moves them so the buffer's refs are released. * * The flag USINGBUFFER is set when the buffer was used, so * only copy back when this flag is on. */ if ((transferinfo[iop].write.func != NULL) && (op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER)) { npy_intp op_transfersize; npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape; int ndim_transfer; NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n", (int)iop); /* * If this operand is being reduced in the inner loop, * its buffering stride was set to zero, and just * one element was copied. */ if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) { if (strides[iop] == 0) { if (reduce_outerstrides[iop] == 0) { op_transfersize = 1; src_stride = 0; dst_strides = &src_stride; dst_coords = &NAD_INDEX(reduce_outeraxisdata); dst_shape = &NAD_SHAPE(reduce_outeraxisdata); ndim_transfer = 1; } else { op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata); src_stride = reduce_outerstrides[iop]; dst_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop]; dst_coords = &NAD_INDEX(reduce_outeraxisdata); dst_shape = &NAD_SHAPE(reduce_outeraxisdata); ndim_transfer = ndim - reduce_outerdim; } } else { if (reduce_outerstrides[iop] == 0) { op_transfersize = NBF_SIZE(bufferdata); src_stride = strides[iop]; dst_strides = &ad_strides[iop]; dst_coords = &NAD_INDEX(axisdata); dst_shape = &NAD_SHAPE(axisdata); ndim_transfer = reduce_outerdim ? reduce_outerdim : 1; } else { op_transfersize = transfersize; src_stride = strides[iop]; dst_strides = &ad_strides[iop]; dst_coords = &NAD_INDEX(axisdata); dst_shape = &NAD_SHAPE(axisdata); ndim_transfer = ndim; } } } else { op_transfersize = transfersize; src_stride = strides[iop]; dst_strides = &ad_strides[iop]; dst_coords = &NAD_INDEX(axisdata); dst_shape = &NAD_SHAPE(axisdata); ndim_transfer = ndim; } NPY_IT_DBG_PRINT2("Iterator: Copying buffer to " "operand %d (%d items)\n", (int)iop, (int)op_transfersize); /* WRITEMASKED operand */ if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) { npy_bool *maskptr; /* * The mask pointer may be in the buffer or in * the array, detect which one. */ if ((op_itflags[maskop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) { maskptr = (npy_bool *)buffers[maskop]; } else { maskptr = (npy_bool *)ad_ptrs[maskop]; } if (PyArray_TransferMaskedStridedToNDim(ndim_transfer, ad_ptrs[iop], dst_strides, axisdata_incr, buffer, src_stride, maskptr, strides[maskop], dst_coords, axisdata_incr, dst_shape, axisdata_incr, op_transfersize, dtypes[iop]->elsize, &transferinfo[iop].write) < 0) { return -1; } } /* Regular operand */ else { if (PyArray_TransferStridedToNDim(ndim_transfer, ad_ptrs[iop], dst_strides, axisdata_incr, buffer, src_stride, dst_coords, axisdata_incr, dst_shape, axisdata_incr, op_transfersize, dtypes[iop]->elsize, &transferinfo[iop].write) < 0) { return -1; } } } /* If there's no copy back, we may have to decrement refs. In * this case, the transfer is instead a function which clears * (DECREFs) the single input. * * The flag USINGBUFFER is set when the buffer was used, so * only decrement refs when this flag is on. */ else if (transferinfo[iop].clear.func != NULL && (op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER)) { NPY_IT_DBG_PRINT1( "Iterator: clearing refs of operand %d\n", (int)iop); npy_intp buf_stride = dtypes[iop]->elsize; if (transferinfo[iop].clear.func( NULL, transferinfo[iop].clear.descr, buffer, transfersize, buf_stride, transferinfo[iop].clear.auxdata) < 0) { /* Since this should only decrement, it should never error */ assert(0); return -1; } } } NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n"); return 0; } /* * This gets called after the iterator has been positioned to a multi-index * for the start of a buffer. It decides which operands need a buffer, * and copies the data into the buffers. */ NPY_NO_EXPORT int npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) { npy_uint32 itflags = NIT_ITFLAGS(iter); int ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); NpyIter_AxisData *axisdata = NIT_AXISDATA(iter), *reduce_outeraxisdata = NULL; PyArray_Descr **dtypes = NIT_DTYPES(iter); PyArrayObject **operands = NIT_OPERANDS(iter); npy_intp *strides = NBF_STRIDES(bufferdata), *ad_strides = NAD_STRIDES(axisdata); npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata); char **buffers = NBF_BUFFERS(bufferdata); npy_intp iterindex, iterend, transfersize, singlestridesize, reduce_innersize = 0, reduce_outerdim = 0; int is_onestride = 0, any_buffered = 0; npy_intp *reduce_outerstrides = NULL; char **reduce_outerptrs = NULL; /* * Have to get this flag before npyiter_checkreducesize sets * it for the next iteration. */ npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) && ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0); npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) / NPY_SIZEOF_INTP; NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n"); /* Calculate the size if using any buffers */ iterindex = NIT_ITERINDEX(iter); iterend = NIT_ITEREND(iter); transfersize = NBF_BUFFERSIZE(bufferdata); if (transfersize > iterend - iterindex) { transfersize = iterend - iterindex; } /* If last time around, the reduce loop structure was full, we reuse it */ if (reuse_reduce_loops) { npy_intp full_transfersize, prev_reduce_outersize; prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata); reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata); reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim); reduce_innersize = NBF_SIZE(bufferdata); NBF_REDUCE_POS(bufferdata) = 0; /* * Try to do make the outersize as big as possible. This allows * it to shrink when processing the last bit of the outer reduce loop, * then grow again at the beginning of the next outer reduce loop. */ NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)- NAD_INDEX(reduce_outeraxisdata)); full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize; /* If the full transfer size doesn't fit in the buffer, truncate it */ if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) { NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize; transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize; } else { transfersize = full_transfersize; } if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) { /* * If the previous time around less data was copied it may not * be safe to reuse the buffers even if the pointers match. */ reuse_reduce_loops = 0; } NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize; NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d " "itersize: %d\n", (int)transfersize, (int)reduce_innersize, (int)NpyIter_GetIterSize(iter)); NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d", (int)NBF_REDUCE_OUTERSIZE(bufferdata)); } /* * If there are any reduction operands, we may have to make * the size smaller so we don't copy the same value into * a buffer twice, as the buffering does not have a mechanism * to combine values itself. */ else if (itflags&NPY_ITFLAG_REDUCE) { NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n"); transfersize = npyiter_checkreducesize(iter, transfersize, &reduce_innersize, &reduce_outerdim); NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d " "itersize: %d\n", (int)transfersize, (int)reduce_innersize, (int)NpyIter_GetIterSize(iter)); reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim); NBF_SIZE(bufferdata) = reduce_innersize; NBF_REDUCE_POS(bufferdata) = 0; NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim; NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize; if (reduce_innersize == 0) { NBF_REDUCE_OUTERSIZE(bufferdata) = 0; return 0; } else { NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize; } } else { NBF_SIZE(bufferdata) = transfersize; NBF_BUFITEREND(bufferdata) = iterindex + transfersize; } /* Calculate the maximum size if using a single stride and no buffers */ singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata); if (singlestridesize > iterend - iterindex) { singlestridesize = iterend - iterindex; } if (singlestridesize >= transfersize) { is_onestride = 1; } NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata); for (iop = 0; iop < nop; ++iop) { switch (op_itflags[iop]& (NPY_OP_ITFLAG_BUFNEVER| NPY_OP_ITFLAG_CAST| NPY_OP_ITFLAG_REDUCE)) { /* Never need to buffer this operand */ case NPY_OP_ITFLAG_BUFNEVER: ptrs[iop] = ad_ptrs[iop]; if (itflags&NPY_ITFLAG_REDUCE) { reduce_outerstrides[iop] = reduce_innersize * strides[iop]; reduce_outerptrs[iop] = ptrs[iop]; } /* * Should not adjust the stride - ad_strides[iop] * could be zero, but strides[iop] was initialized * to the first non-trivial stride. */ /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */ assert(!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER)); break; /* Never need to buffer this operand */ case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE: ptrs[iop] = ad_ptrs[iop]; reduce_outerptrs[iop] = ptrs[iop]; reduce_outerstrides[iop] = 0; /* * Should not adjust the stride - ad_strides[iop] * could be zero, but strides[iop] was initialized * to the first non-trivial stride. */ /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */ assert(!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER)); break; /* Just a copy */ case 0: /* Do not reuse buffer if it did not exist */ if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) && (prev_dataptrs != NULL)) { prev_dataptrs[iop] = NULL; } /* * No copyswap or cast was requested, so all we're * doing is copying the data to fill the buffer and * produce a single stride. If the underlying data * already does that, no need to copy it. */ if (is_onestride) { ptrs[iop] = ad_ptrs[iop]; strides[iop] = ad_strides[iop]; /* Signal that the buffer is not being used */ op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER); } /* If some other op is reduced, we have a double reduce loop */ else if ((itflags&NPY_ITFLAG_REDUCE) && (reduce_outerdim == 1) && (transfersize/reduce_innersize <= NAD_SHAPE(reduce_outeraxisdata) - NAD_INDEX(reduce_outeraxisdata))) { ptrs[iop] = ad_ptrs[iop]; reduce_outerptrs[iop] = ptrs[iop]; strides[iop] = ad_strides[iop]; reduce_outerstrides[iop] = NAD_STRIDES(reduce_outeraxisdata)[iop]; /* Signal that the buffer is not being used */ op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER); } else { /* In this case, the buffer is being used */ ptrs[iop] = buffers[iop]; strides[iop] = dtypes[iop]->elsize; if (itflags&NPY_ITFLAG_REDUCE) { reduce_outerstrides[iop] = reduce_innersize * strides[iop]; reduce_outerptrs[iop] = ptrs[iop]; } /* Signal that the buffer is being used */ op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER; } break; /* Just a copy, but with a reduction */ case NPY_OP_ITFLAG_REDUCE: /* Do not reuse buffer if it did not exist */ if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) && (prev_dataptrs != NULL)) { prev_dataptrs[iop] = NULL; } if (ad_strides[iop] == 0) { strides[iop] = 0; /* It's all in one stride in the inner loop dimension */ if (is_onestride) { NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop); ptrs[iop] = ad_ptrs[iop]; reduce_outerstrides[iop] = 0; /* Signal that the buffer is not being used */ op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER); } /* It's all in one stride in the reduce outer loop */ else if ((reduce_outerdim > 0) && (transfersize/reduce_innersize <= NAD_SHAPE(reduce_outeraxisdata) - NAD_INDEX(reduce_outeraxisdata))) { NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n", (int)iop); ptrs[iop] = ad_ptrs[iop]; /* Outer reduce loop advances by one item */ reduce_outerstrides[iop] = NAD_STRIDES(reduce_outeraxisdata)[iop]; /* Signal that the buffer is not being used */ op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER); } /* In this case, the buffer is being used */ else { NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop); ptrs[iop] = buffers[iop]; /* Both outer and inner reduce loops have stride 0 */ if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) { reduce_outerstrides[iop] = 0; } /* Outer reduce loop advances by one item */ else { reduce_outerstrides[iop] = dtypes[iop]->elsize; } /* Signal that the buffer is being used */ op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER; } } else if (is_onestride) { NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop); ptrs[iop] = ad_ptrs[iop]; strides[iop] = ad_strides[iop]; reduce_outerstrides[iop] = 0; /* Signal that the buffer is not being used */ op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER); } else { /* It's all in one stride in the reduce outer loop */ if ((reduce_outerdim == 1) && (transfersize/reduce_innersize <= NAD_SHAPE(reduce_outeraxisdata) - NAD_INDEX(reduce_outeraxisdata))) { ptrs[iop] = ad_ptrs[iop]; strides[iop] = ad_strides[iop]; /* Outer reduce loop advances by one item */ reduce_outerstrides[iop] = NAD_STRIDES(reduce_outeraxisdata)[iop]; /* Signal that the buffer is not being used */ op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER); } /* In this case, the buffer is being used */ else { ptrs[iop] = buffers[iop]; strides[iop] = dtypes[iop]->elsize; if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) { /* Reduction in outer reduce loop */ reduce_outerstrides[iop] = 0; } else { /* Advance to next items in outer reduce loop */ reduce_outerstrides[iop] = reduce_innersize * dtypes[iop]->elsize; } /* Signal that the buffer is being used */ op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER; } } reduce_outerptrs[iop] = ptrs[iop]; break; default: /* In this case, the buffer is always being used */ any_buffered = 1; /* Signal that the buffer is being used */ op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER; if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) { ptrs[iop] = buffers[iop]; strides[iop] = dtypes[iop]->elsize; if (itflags&NPY_ITFLAG_REDUCE) { reduce_outerstrides[iop] = reduce_innersize * strides[iop]; reduce_outerptrs[iop] = ptrs[iop]; } } /* The buffer is being used with reduction */ else { ptrs[iop] = buffers[iop]; if (ad_strides[iop] == 0) { NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop); strides[iop] = 0; /* Both outer and inner reduce loops have stride 0 */ if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) { NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop); reduce_outerstrides[iop] = 0; } /* Outer reduce loop advances by one item */ else { NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop); reduce_outerstrides[iop] = dtypes[iop]->elsize; } } else { NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop); strides[iop] = dtypes[iop]->elsize; if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) { NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop); /* Reduction in outer reduce loop */ reduce_outerstrides[iop] = 0; } else { NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop); /* Advance to next items in outer reduce loop */ reduce_outerstrides[iop] = reduce_innersize * dtypes[iop]->elsize; } } reduce_outerptrs[iop] = ptrs[iop]; } break; } /* * If OP_ITFLAG_USINGBUFFER is enabled and the read func is not NULL, * the buffer needs to be read. */ if (op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER && transferinfo[iop].read.func != NULL) { npy_intp src_itemsize; npy_intp op_transfersize; npy_intp dst_stride, *src_strides, *src_coords, *src_shape; int ndim_transfer; npy_bool skip_transfer = 0; src_itemsize = PyArray_DTYPE(operands[iop])->elsize; /* If we reach here, buffering is required */ any_buffered = 1; /* * If this operand is being reduced in the inner loop, * set its buffering stride to zero, and just copy * one element. */ if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) { if (ad_strides[iop] == 0) { strides[iop] = 0; if (reduce_outerstrides[iop] == 0) { op_transfersize = 1; dst_stride = 0; src_strides = &dst_stride; src_coords = &NAD_INDEX(reduce_outeraxisdata); src_shape = &NAD_SHAPE(reduce_outeraxisdata); ndim_transfer = 1; /* * When we're reducing a single element, and * it's still the same element, don't overwrite * it even when reuse reduce loops is unset. * This preserves the precision of the * intermediate calculation. */ if (prev_dataptrs && prev_dataptrs[iop] == ad_ptrs[iop]) { NPY_IT_DBG_PRINT1("Iterator: skipping operand %d" " copy because it's a 1-element reduce\n", (int)iop); skip_transfer = 1; } } else { op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata); dst_stride = reduce_outerstrides[iop]; src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop]; src_coords = &NAD_INDEX(reduce_outeraxisdata); src_shape = &NAD_SHAPE(reduce_outeraxisdata); ndim_transfer = ndim - reduce_outerdim; } } else { if (reduce_outerstrides[iop] == 0) { op_transfersize = NBF_SIZE(bufferdata); dst_stride = strides[iop]; src_strides = &ad_strides[iop]; src_coords = &NAD_INDEX(axisdata); src_shape = &NAD_SHAPE(axisdata); ndim_transfer = reduce_outerdim ? reduce_outerdim : 1; } else { op_transfersize = transfersize; dst_stride = strides[iop]; src_strides = &ad_strides[iop]; src_coords = &NAD_INDEX(axisdata); src_shape = &NAD_SHAPE(axisdata); ndim_transfer = ndim; } } } else { op_transfersize = transfersize; dst_stride = strides[iop]; src_strides = &ad_strides[iop]; src_coords = &NAD_INDEX(axisdata); src_shape = &NAD_SHAPE(axisdata); ndim_transfer = ndim; } /* * If the whole buffered loop structure remains the same, * and the source pointer for this data didn't change, * we don't have to copy the data again. */ if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) { NPY_IT_DBG_PRINT2("Iterator: skipping operands %d " "copy (%d items) because loops are reused and the data " "pointer didn't change\n", (int)iop, (int)op_transfersize); skip_transfer = 1; } /* * Copy data to the buffers if necessary. * * We always copy if the operand has references. In that case * a "write" function must be in use that either copies or clears * the buffer. * This write from buffer call does not check for skip-transfer * so we have to assume the buffer is cleared. For dtypes that * do not have references, we can assume that the write function * will leave the source (buffer) unmodified. */ if (!skip_transfer || PyDataType_REFCHK(dtypes[iop])) { NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to " "buffer (%d items)\n", (int)iop, (int)op_transfersize); if (PyArray_TransferNDimToStrided( ndim_transfer, ptrs[iop], dst_stride, ad_ptrs[iop], src_strides, axisdata_incr, src_coords, axisdata_incr, src_shape, axisdata_incr, op_transfersize, src_itemsize, &transferinfo[iop].read) < 0) { return -1; } } } } /* * If buffering wasn't needed, we can grow the inner * loop to as large as possible. * * TODO: Could grow REDUCE loop too with some more logic above. */ if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) && !(itflags&NPY_ITFLAG_REDUCE)) { if (singlestridesize > transfersize) { NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size " "from %d to %d since buffering wasn't needed\n", (int)NBF_SIZE(bufferdata), (int)singlestridesize); NBF_SIZE(bufferdata) = singlestridesize; NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize; } } NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered); NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers " "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata)); return 0; } /** * This function clears any references still held by the buffers and should * only be used to discard buffers if an error occurred. * * @param iter Iterator */ NPY_NO_EXPORT void npyiter_clear_buffers(NpyIter *iter) { int nop = iter->nop; NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); if (NBF_SIZE(bufferdata) == 0) { /* if the buffers are empty already, there is nothing to do */ return; } /* * The iterator may be using a dtype with references (as of writing this * means Python objects, but does not need to stay that way). * In that case, further cleanup may be necessary and we clear buffers * explicitly. */ PyObject *type, *value, *traceback; PyErr_Fetch(&type, &value, &traceback); /* Cleanup any buffers with references */ char **buffers = NBF_BUFFERS(bufferdata); NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata); PyArray_Descr **dtypes = NIT_DTYPES(iter); npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); for (int iop = 0; iop < nop; ++iop, ++buffers) { if (transferinfo[iop].clear.func == NULL || !(op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER)) { continue; } if (*buffers == 0) { continue; } int itemsize = dtypes[iop]->elsize; if (transferinfo[iop].clear.func(NULL, dtypes[iop], *buffers, NBF_SIZE(bufferdata), itemsize, transferinfo[iop].clear.auxdata) < 0) { /* This should never fail; if it does write it out */ PyErr_WriteUnraisable(NULL); } } /* Signal that the buffers are empty */ NBF_SIZE(bufferdata) = 0; PyErr_Restore(type, value, traceback); } /* * This checks how much space can be buffered without encountering the * same value twice, or for operands whose innermost stride is zero, * without encountering a different value. By reducing the buffered * amount to this size, reductions can be safely buffered. * * Reductions are buffered with two levels of looping, to avoid * frequent copying to the buffers. The return value is the over-all * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize * receives the size of the inner of the two levels of looping. * * The value placed in reduce_outerdim is the index into the AXISDATA * for where the second level of the double loop begins. * * The return value is always a multiple of the value placed in * reduce_innersize. */ static npy_intp npyiter_checkreducesize(NpyIter *iter, npy_intp count, npy_intp *reduce_innersize, npy_intp *reduce_outerdim) { npy_uint32 itflags = NIT_ITFLAGS(iter); int idim, ndim = NIT_NDIM(iter); int iop, nop = NIT_NOP(iter); NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; npy_intp coord, shape, *strides; npy_intp reducespace = 1, factor; npy_bool nonzerocoord; npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter); char stride0op[NPY_MAXARGS]; /* Default to no outer axis */ *reduce_outerdim = 0; /* If there's only one dimension, no need to calculate anything */ if (ndim == 1 || count == 0) { *reduce_innersize = count; return count; } sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); axisdata = NIT_AXISDATA(iter); /* Indicate which REDUCE operands have stride 0 in the inner loop */ strides = NAD_STRIDES(axisdata); for (iop = 0; iop < nop; ++iop) { stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) && (strides[iop] == 0); NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in " "the inner loop? %d\n", iop, (int)stride0op[iop]); } shape = NAD_SHAPE(axisdata); coord = NAD_INDEX(axisdata); reducespace += (shape-coord-1); factor = shape; NIT_ADVANCE_AXISDATA(axisdata, 1); /* Initialize nonzerocoord based on the first coordinate */ nonzerocoord = (coord != 0); /* Go forward through axisdata, calculating the space available */ for (idim = 1; idim < ndim && reducespace < count; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n", (int)reducespace, (int)count); strides = NAD_STRIDES(axisdata); for (iop = 0; iop < nop; ++iop) { /* * If a reduce stride switched from zero to non-zero, or * vice versa, that's the point where the data will stop * being the same element or will repeat, and if the * buffer starts with an all zero multi-index up to this * point, gives us the reduce_innersize. */ if((stride0op[iop] && (strides[iop] != 0)) || (!stride0op[iop] && (strides[iop] == 0) && (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) { NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits " "buffer to %d\n", (int)reducespace); /* * If we already found more elements than count, or * the starting coordinate wasn't zero, the two-level * looping is unnecessary/can't be done, so return. */ if (count <= reducespace) { *reduce_innersize = count; NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS; return count; } else if (nonzerocoord) { if (reducespace < count) { count = reducespace; } *reduce_innersize = count; /* NOTE: This is similar to the (coord != 0) case below. */ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS; return count; } else { *reduce_innersize = reducespace; break; } } } /* If we broke out of the loop early, we found reduce_innersize */ if (iop != nop) { NPY_IT_DBG_PRINT2("Iterator: Found first dim not " "reduce (%d of %d)\n", iop, nop); break; } shape = NAD_SHAPE(axisdata); coord = NAD_INDEX(axisdata); if (coord != 0) { nonzerocoord = 1; } reducespace += (shape-coord-1) * factor; factor *= shape; } /* * If there was any non-zero coordinate, the reduction inner * loop doesn't fit in the buffersize, or the reduction inner loop * covered the entire iteration size, can't do the double loop. */ if (nonzerocoord || count < reducespace || idim == ndim) { if (reducespace < count) { count = reducespace; } *reduce_innersize = count; /* In this case, we can't reuse the reduce loops */ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS; return count; } coord = NAD_INDEX(axisdata); if (coord != 0) { /* * In this case, it is only safe to reuse the buffer if the amount * of data copied is not more than the current axes, as is the * case when reuse_reduce_loops was active already. * It should be in principle OK when the idim loop returns immediately. */ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS; } else { /* In this case, we can reuse the reduce loops */ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS; } *reduce_innersize = reducespace; count /= reducespace; NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n", (int)reducespace, (int)count); /* * Continue through the rest of the dimensions. If there are * two separated reduction axes, we may have to cut the buffer * short again. */ *reduce_outerdim = idim; reducespace = 1; factor = 1; /* Indicate which REDUCE operands have stride 0 at the current level */ strides = NAD_STRIDES(axisdata); for (iop = 0; iop < nop; ++iop) { stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) && (strides[iop] == 0); NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in " "the outer loop? %d\n", iop, (int)stride0op[iop]); } shape = NAD_SHAPE(axisdata); reducespace += (shape-coord-1) * factor; factor *= shape; NIT_ADVANCE_AXISDATA(axisdata, 1); ++idim; for (; idim < ndim && reducespace < count; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n", (int)reducespace, (int)count); strides = NAD_STRIDES(axisdata); for (iop = 0; iop < nop; ++iop) { /* * If a reduce stride switched from zero to non-zero, or * vice versa, that's the point where the data will stop * being the same element or will repeat, and if the * buffer starts with an all zero multi-index up to this * point, gives us the reduce_innersize. */ if((stride0op[iop] && (strides[iop] != 0)) || (!stride0op[iop] && (strides[iop] == 0) && (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) { NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits " "buffer to %d\n", (int)reducespace); /* * This terminates the outer level of our double loop. */ if (count <= reducespace) { return count * (*reduce_innersize); } else { return reducespace * (*reduce_innersize); } } } shape = NAD_SHAPE(axisdata); coord = NAD_INDEX(axisdata); if (coord != 0) { nonzerocoord = 1; } reducespace += (shape-coord-1) * factor; factor *= shape; } if (reducespace < count) { count = reducespace; } return count * (*reduce_innersize); } NPY_NO_EXPORT npy_bool npyiter_has_writeback(NpyIter *iter) { int iop, nop; npyiter_opitflags *op_itflags; if (iter == NULL) { return 0; } nop = NIT_NOP(iter); op_itflags = NIT_OPITFLAGS(iter); for (iop=0; iop