BUG: Do not reuse nditer buffers when not filled enough.

This checks if the previous time around, the buffers were filled with as much data as they would be filled this time around. Since This is difficult for the initial loop before reusing is activated because in that case the buffer may be larger then just the first outer reduce dimension. In that case do not allow reuse unless the index along that dimension was 0. When the inner reduce index is not 0, then also the reusing of the buffer is dangerous.
author: Sebastian Berg <sebastian@sipsolutions.net> 2013-03-17 16:04:36 +0100
committer: Sebastian Berg <sebastian@sipsolutions.net> 2013-03-18 15:48:58 +0100
commit: 5e8b322ea4ac4acf33bb89d7bdc4f3397d047cb4 (patch)
tree: 12a3e298f344bf581241502b1c07f8a7ad93d533 /numpy
parent: bd7104cef45be9a4250cb67af9225c9606498a51 (diff)
download: numpy-5e8b322ea4ac4acf33bb89d7bdc4f3397d047cb4.tar.gz
1 files changed, 26 insertions, 4 deletions
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 09e572f10..7d468a811 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -2062,8 +2062,9 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
 
     /* If last time around, the reduce loop structure was full, we reuse it */
     if (reuse_reduce_loops) {
-        npy_intp full_transfersize;
+        npy_intp full_transfersize, prev_reduce_outersize;
 
+        prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
         reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
         reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
         reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
@@ -2086,6 +2087,13 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
         else {
             transfersize = full_transfersize;
         }
+        if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
+            /*
+             * If the previous time around less data was copied it may not
+             * be safe to reuse the buffers even if the pointers match.
+             */
+            reuse_reduce_loops = 0;
+        }
         NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
 
         NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
@@ -2615,6 +2623,7 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
                  */
                 if (count <= reducespace) {
                     *reduce_innersize = count;
+                    NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
                     return count;
                 }
                 else if (nonzerocoord) {
@@ -2622,6 +2631,8 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
                         count = reducespace;
                     }
                     *reduce_innersize = count;
+                    /* NOTE: This is similar to the (coord != 0) case below. */
+                    NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
                     return count;
                 }
                 else {
@@ -2661,8 +2672,20 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
         return count;
     }
 
-    /* In this case, we can reuse the reduce loops */
-    NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+    coord = NAD_INDEX(axisdata);
+    if (coord != 0) {
+        /*
+         * In this case, it is only safe to reuse the buffer if the amount
+         * of data copied is not more then the current axes, as is the
+         * case when reuse_reduce_loops was active already.
+         * It should be in principle OK when the idim loop returns immidiatly.
+         */
+        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+    }
+    else {
+        /* In this case, we can reuse the reduce loops */
+        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
+    }
 
     *reduce_innersize = reducespace;
     count /= reducespace;
@@ -2687,7 +2710,6 @@ npyiter_checkreducesize(NpyIter *iter, npy_intp count,
                         "the outer loop? %d\n", iop, (int)stride0op[iop]);
     }
     shape = NAD_SHAPE(axisdata);
-    coord = NAD_INDEX(axisdata);
     reducespace += (shape-coord-1) * factor;
     factor *= shape;
     NIT_ADVANCE_AXISDATA(axisdata, 1);
author	Sebastian Berg <sebastian@sipsolutions.net>	2013-03-17 16:04:36 +0100
committer	Sebastian Berg <sebastian@sipsolutions.net>	2013-03-18 15:48:58 +0100
commit	5e8b322ea4ac4acf33bb89d7bdc4f3397d047cb4 (patch)
tree	12a3e298f344bf581241502b1c07f8a7ad93d533 /numpy
parent	bd7104cef45be9a4250cb67af9225c9606498a51 (diff)
download	numpy-5e8b322ea4ac4acf33bb89d7bdc4f3397d047cb4.tar.gz