summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/multiarray/nditer_api.c40
-rw-r--r--numpy/core/tests/test_nditer.py51
2 files changed, 87 insertions, 4 deletions
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 007958800..40043648d 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -2065,8 +2065,9 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
/* If last time around, the reduce loop structure was full, we reuse it */
if (reuse_reduce_loops) {
- npy_intp full_transfersize;
+ npy_intp full_transfersize, prev_reduce_outersize;
+ prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
@@ -2089,6 +2090,13 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
else {
transfersize = full_transfersize;
}
+ if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
+ /*
+ * If the previous time around less data was copied it may not
+ * be safe to reuse the buffers even if the pointers match.
+ */
+ reuse_reduce_loops = 0;
+ }
NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
@@ -2187,6 +2195,11 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
break;
/* Just a copy */
case 0:
+ /* Do not reuse buffer if it did not exist */
+ if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
+ (prev_dataptrs != NULL)) {
+ prev_dataptrs[iop] = NULL;
+ }
/*
* No copyswap or cast was requested, so all we're
* doing is copying the data to fill the buffer and
@@ -2230,6 +2243,11 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
break;
/* Just a copy, but with a reduction */
case NPY_OP_ITFLAG_REDUCE:
+ /* Do not reuse buffer if it did not exist */
+ if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
+ (prev_dataptrs != NULL)) {
+ prev_dataptrs[iop] = NULL;
+ }
if (ad_strides[iop] == 0) {
strides[iop] = 0;
/* It's all in one stride in the inner loop dimension */
@@ -2618,6 +2636,7 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
*/
if (count <= reducespace) {
*reduce_innersize = count;
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
else if (nonzerocoord) {
@@ -2625,6 +2644,8 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
count = reducespace;
}
*reduce_innersize = count;
+ /* NOTE: This is similar to the (coord != 0) case below. */
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
return count;
}
else {
@@ -2664,8 +2685,20 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
return count;
}
- /* In this case, we can reuse the reduce loops */
- NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ coord = NAD_INDEX(axisdata);
+ if (coord != 0) {
+ /*
+ * In this case, it is only safe to reuse the buffer if the amount
+ * of data copied is not more then the current axes, as is the
+ * case when reuse_reduce_loops was active already.
+ * It should be in principle OK when the idim loop returns immidiatly.
+ */
+ NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ }
+ else {
+ /* In this case, we can reuse the reduce loops */
+ NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+ }
*reduce_innersize = reducespace;
count /= reducespace;
@@ -2690,7 +2723,6 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
"the outer loop? %d\n", iop, (int)stride0op[iop]);
}
shape = NAD_SHAPE(axisdata);
- coord = NAD_INDEX(axisdata);
reducespace += (shape-coord-1) * factor;
factor *= shape;
NIT_ADVANCE_AXISDATA(axisdata, 1);
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index dd0ed8cfa..9227d89f9 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -2017,6 +2017,57 @@ def test_iter_buffering_growinner():
# Should end up with just one inner loop here
assert_equal(i[0].size, a.size)
+
+@dec.slow
+def test_iter_buffered_reduce_reuse():
+ # large enough array for all views, including negative strides.
+ a = np.arange(2*3**5)[3**5:3**5+1]
+ flags = ['buffered', 'delay_bufalloc', 'multi_index', 'reduce_ok', 'refs_ok']
+ op_flags = [('readonly',), ('readwrite','allocate')]
+ op_axes_list = [[(0,1,2), (0,1,-1)], [(0,1,2), (0,-1,-1)]]
+ # wrong dtype to force buffering
+ op_dtypes = [np.float, a.dtype]
+
+ def get_params():
+ for xs in xrange(-3**2, 3**2 + 1):
+ for ys in xrange(xs, 3**2 + 1):
+ for op_axes in op_axes_list:
+ # last stride is reduced and because of that not
+ # important for this test, as it is the inner stride.
+ strides = (xs * a.itemsize, ys * a.itemsize, a.itemsize)
+ arr = np.lib.stride_tricks.as_strided(a, (3,3,3), strides)
+
+ for skip in [0, 1]:
+ yield arr, op_axes, skip
+
+ for arr, op_axes, skip in get_params():
+ nditer2 = np.nditer([arr.copy(), None],
+ op_axes=op_axes, flags=flags, op_flags=op_flags,
+ op_dtypes=op_dtypes)
+ nditer2.operands[-1][...] = 0
+ nditer2.reset()
+ nditer2.iterindex = skip
+
+ for (a2_in, b2_in) in nditer2:
+ b2_in += a2_in.astype(np.int_)
+
+ comp_res = nditer2.operands[-1]
+
+ for bufsize in xrange(0, 3**3):
+ nditer1 = np.nditer([arr, None],
+ op_axes=op_axes, flags=flags, op_flags=op_flags,
+ buffersize=bufsize, op_dtypes=op_dtypes)
+ nditer1.operands[-1][...] = 0
+ nditer1.reset()
+ nditer1.iterindex = skip
+
+ for (a1_in, b1_in) in nditer1:
+ b1_in += a1_in.astype(np.int_)
+
+ res = nditer1.operands[-1]
+ assert_array_equal(res, comp_res)
+
+
def test_iter_no_broadcast():
# Test that the no_broadcast flag works
a = np.arange(24).reshape(2,3,4)