diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2013-04-01 12:54:54 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2013-04-01 12:54:54 -0700 |
commit | 0563ebf44c3d333a9abdc6530cd74514455f99c9 (patch) | |
tree | 6c8f02ddd9bdfbcf8d53de6abc0dd0dee9014258 /numpy | |
parent | d3edb4e84e3630320a3f577a83113f086cc1b563 (diff) | |
parent | 1e6d6e6272a1ca83c8e45d33c50268cfa4b32511 (diff) | |
download | numpy-0563ebf44c3d333a9abdc6530cd74514455f99c9.tar.gz |
Merge pull request #3153 from seberg/issue-3142
BUG: Do not reuse nditer buffers when not filled enough.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/nditer_api.c | 40 | ||||
-rw-r--r-- | numpy/core/tests/test_nditer.py | 51 |
2 files changed, 87 insertions, 4 deletions
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c index 007958800..40043648d 100644 --- a/numpy/core/src/multiarray/nditer_api.c +++ b/numpy/core/src/multiarray/nditer_api.c @@ -2065,8 +2065,9 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) /* If last time around, the reduce loop structure was full, we reuse it */ if (reuse_reduce_loops) { - npy_intp full_transfersize; + npy_intp full_transfersize, prev_reduce_outersize; + prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata); reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata); reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata); reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata); @@ -2089,6 +2090,13 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) else { transfersize = full_transfersize; } + if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) { + /* + * If the previous time around less data was copied it may not + * be safe to reuse the buffers even if the pointers match. + */ + reuse_reduce_loops = 0; + } NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize; NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d " @@ -2187,6 +2195,11 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) break; /* Just a copy */ case 0: + /* Do not reuse buffer if it did not exist */ + if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) && + (prev_dataptrs != NULL)) { + prev_dataptrs[iop] = NULL; + } /* * No copyswap or cast was requested, so all we're * doing is copying the data to fill the buffer and @@ -2230,6 +2243,11 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) break; /* Just a copy, but with a reduction */ case NPY_OP_ITFLAG_REDUCE: + /* Do not reuse buffer if it did not exist */ + if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) && + (prev_dataptrs != NULL)) { + prev_dataptrs[iop] = NULL; + } if (ad_strides[iop] == 0) { strides[iop] = 0; /* It's all in one stride in the inner loop dimension */ @@ -2618,6 +2636,7 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count, */ if (count <= reducespace) { *reduce_innersize = count; + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS; return count; } else if (nonzerocoord) { @@ -2625,6 +2644,8 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count, count = reducespace; } *reduce_innersize = count; + /* NOTE: This is similar to the (coord != 0) case below. */ + NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS; return count; } else { @@ -2664,8 +2685,20 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count, return count; } - /* In this case, we can reuse the reduce loops */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS; + coord = NAD_INDEX(axisdata); + if (coord != 0) { + /* + * In this case, it is only safe to reuse the buffer if the amount + * of data copied is not more then the current axes, as is the + * case when reuse_reduce_loops was active already. + * It should be in principle OK when the idim loop returns immidiatly. + */ + NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS; + } + else { + /* In this case, we can reuse the reduce loops */ + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS; + } *reduce_innersize = reducespace; count /= reducespace; @@ -2690,7 +2723,6 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count, "the outer loop? %d\n", iop, (int)stride0op[iop]); } shape = NAD_SHAPE(axisdata); - coord = NAD_INDEX(axisdata); reducespace += (shape-coord-1) * factor; factor *= shape; NIT_ADVANCE_AXISDATA(axisdata, 1); diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index dd0ed8cfa..9227d89f9 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -2017,6 +2017,57 @@ def test_iter_buffering_growinner(): # Should end up with just one inner loop here assert_equal(i[0].size, a.size) + +@dec.slow +def test_iter_buffered_reduce_reuse(): + # large enough array for all views, including negative strides. + a = np.arange(2*3**5)[3**5:3**5+1] + flags = ['buffered', 'delay_bufalloc', 'multi_index', 'reduce_ok', 'refs_ok'] + op_flags = [('readonly',), ('readwrite','allocate')] + op_axes_list = [[(0,1,2), (0,1,-1)], [(0,1,2), (0,-1,-1)]] + # wrong dtype to force buffering + op_dtypes = [np.float, a.dtype] + + def get_params(): + for xs in xrange(-3**2, 3**2 + 1): + for ys in xrange(xs, 3**2 + 1): + for op_axes in op_axes_list: + # last stride is reduced and because of that not + # important for this test, as it is the inner stride. + strides = (xs * a.itemsize, ys * a.itemsize, a.itemsize) + arr = np.lib.stride_tricks.as_strided(a, (3,3,3), strides) + + for skip in [0, 1]: + yield arr, op_axes, skip + + for arr, op_axes, skip in get_params(): + nditer2 = np.nditer([arr.copy(), None], + op_axes=op_axes, flags=flags, op_flags=op_flags, + op_dtypes=op_dtypes) + nditer2.operands[-1][...] = 0 + nditer2.reset() + nditer2.iterindex = skip + + for (a2_in, b2_in) in nditer2: + b2_in += a2_in.astype(np.int_) + + comp_res = nditer2.operands[-1] + + for bufsize in xrange(0, 3**3): + nditer1 = np.nditer([arr, None], + op_axes=op_axes, flags=flags, op_flags=op_flags, + buffersize=bufsize, op_dtypes=op_dtypes) + nditer1.operands[-1][...] = 0 + nditer1.reset() + nditer1.iterindex = skip + + for (a1_in, b1_in) in nditer1: + b1_in += a1_in.astype(np.int_) + + res = nditer1.operands[-1] + assert_array_equal(res, comp_res) + + def test_iter_no_broadcast(): # Test that the no_broadcast flag works a = np.arange(24).reshape(2,3,4) |