summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/multiarray/nditer_api.c30
1 files changed, 26 insertions, 4 deletions
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 09e572f10..7d468a811 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -2062,8 +2062,9 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
/* If last time around, the reduce loop structure was full, we reuse it */
if (reuse_reduce_loops) {
- npy_intp full_transfersize;
+ npy_intp full_transfersize, prev_reduce_outersize;
+ prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
@@ -2086,6 +2087,13 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
else {
transfersize = full_transfersize;
}
+ if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
+ /*
+ * If the previous time around less data was copied it may not
+ * be safe to reuse the buffers even if the pointers match.
+ */
+ reuse_reduce_loops = 0;
+ }
NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
@@ -2615,6 +2623,7 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
*/
if (count <= reducespace) {
*reduce_innersize = count;
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
else if (nonzerocoord) {
@@ -2622,6 +2631,8 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
count = reducespace;
}
*reduce_innersize = count;
+ /* NOTE: This is similar to the (coord != 0) case below. */
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
else {
@@ -2661,8 +2672,20 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
return count;
}
- /* In this case, we can reuse the reduce loops */
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ coord = NAD_INDEX(axisdata);
+ if (coord != 0) {
+ /*
+ * In this case, it is only safe to reuse the buffer if the amount
+ * of data copied is not more then the current axes, as is the
+ * case when reuse_reduce_loops was active already.
+ * It should be in principle OK when the idim loop returns immidiatly.
+ */
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ }
+ else {
+ /* In this case, we can reuse the reduce loops */
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ }
*reduce_innersize = reducespace;
count /= reducespace;
@@ -2687,7 +2710,6 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
"the outer loop? %d\n", iop, (int)stride0op[iop]);
}
shape = NAD_SHAPE(axisdata);
- coord = NAD_INDEX(axisdata);
reducespace += (shape-coord-1) * factor;
factor *= shape;
NIT_ADVANCE_AXISDATA(axisdata, 1);