ENH: iter: Allow copies of read-only scalar arrays even if COPY isn't specified

When buffering, this reduces the work needed by a lot. For non-reduction operands, the iterator always provides contiguous data when an operand is buffered, but by making a copy the operand can be flagged as never needing buffering
author: Mark Wiebe <mwwiebe@gmail.com> 2011-01-30 13:33:15 -0800
committer: Mark Wiebe <mwwiebe@gmail.com> 2011-01-30 13:47:38 -0800
commit: 4faf10e9246a77bc19c77dc80363d809f5a85396 (patch)
tree: 98c0eca62403b7b4a95140bf3a7f620e12ae4651
parent: 395146e64101ac65a057214d64135993a4c67d16 (diff)
download: numpy-4faf10e9246a77bc19c77dc80363d809f5a85396.tar.gz
3 files changed, 79 insertions, 39 deletions
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src
index 9a8977b8a..8c8a1b897 100644
--- a/numpy/core/src/multiarray/new_iterator.c.src
+++ b/numpy/core/src/multiarray/new_iterator.c.src
@@ -4416,10 +4416,16 @@ npyiter_allocate_arrays(NpyIter *iter,
     npy_intp idim, ndim = NIT_NDIM(iter);
     npy_intp iiter, niter = NIT_NITER(iter);
 
-    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
+    NpyIter_BufferData *bufferdata = NULL;
     PyArrayObject **op = NIT_OPERANDS(iter);
 
+    if (itflags&NPY_ITFLAG_BUFFER) {
+        bufferdata = NIT_BUFFERDATA(iter);
+    }
+
+
     for (iiter = 0; iiter < niter; ++iiter) {
+        /* NULL means an output the iterator should allocate */
         if (op[iiter] == NULL) {
             PyArrayObject *out;
             PyTypeObject *op_subtype;
@@ -4453,6 +4459,50 @@ npyiter_allocate_arrays(NpyIter *iter,
             op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
             op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST;
         }
+        /*
+         * If casting is required, the operand is read-only, and
+         * it's an array scalar, make a copy whether or not the
+         * copy flag is enabled.
+         */
+        else if ((op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
+                         NPY_OP_ITFLAG_READ|
+                         NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST|
+                                                   NPY_OP_ITFLAG_READ) &&
+                          PyArray_NDIM(op[iiter]) == 0) {
+            PyArrayObject *temp;
+            Py_INCREF(op_dtype[iiter]);
+            temp = (PyArrayObject *)PyArray_NewFromDescr(
+                                        &PyArray_Type, op_dtype[iiter],
+                                        0, NULL, NULL, NULL, 0, NULL);
+            if (temp == NULL) {
+                return 0;
+            }
+            if (PyArray_CopyInto(temp, op[iiter]) != 0) {
+                Py_DECREF(temp);
+                return 0;
+            }
+            Py_DECREF(op[iiter]);
+            op[iiter] = temp;
+
+            /*
+             * Now we need to replace the pointers and strides with values
+             * from the temporary array.
+             */
+            npyiter_replace_axisdata(iter, iiter, op[iiter], 0,
+                    PyArray_DATA(op[iiter]), NULL);
+
+            /*
+             * New arrays are aligned need no cast, and in the case
+             * of scalars, always have stride 0 so never need buffering
+             */
+            op_itflags[iiter] |= (NPY_OP_ITFLAG_ALIGNED|
+                                  NPY_OP_ITFLAG_BUFNEVER);
+            op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST;
+            if (itflags&NPY_ITFLAG_BUFFER) {
+                NBF_STRIDES(bufferdata)[iiter] = 0;
+            }
+        }
+        /* If casting is required and permitted */
         else if ((op_itflags[iiter]&NPY_OP_ITFLAG_CAST) &&
                    (op_flags[iiter]&(NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) {
             PyArrayObject *temp;
@@ -4494,7 +4544,10 @@ npyiter_allocate_arrays(NpyIter *iter,
             npyiter_replace_axisdata(iter, iiter, op[iiter], ondim,
                     PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
 
-            /* The temporary copy is aligned and needs no cast */
+            /*
+             * The temporary copy is aligned and needs no cast, and
+             * has constant stride 0 so never needs buffering
+             */
             op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
             op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST;
         }
@@ -4543,16 +4596,9 @@ npyiter_allocate_arrays(NpyIter *iter,
          * If no alignment, byte swap, or casting is needed, and
          * the inner stride of this operand works for the whole
          * array, we can set NPY_OP_ITFLAG_BUFNEVER.
-         * But, if buffering is enabled, write-buffering must be
-         * one-to-one, because the buffering write back won't combine
-         * values correctly. This test doesn't catch everything, but it will
-         * catch the most common case of a broadcasting a write-buffered
-         * dimension.
          */
-        if ((itflags&NPY_ITFLAG_BUFFER) &&
-                        (!(op_itflags[iiter]&NPY_OP_ITFLAG_CAST) ||
-                          (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) {
-            int is_one_to_one = 1;
+        if (PyArray_NDIM(op[iiter]) > 0 && (itflags&NPY_ITFLAG_BUFFER) &&
+                        !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) {
             npy_intp stride, shape, innerstride = 0, innershape;
             NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
             npy_intp sizeof_axisdata =
@@ -4562,9 +4608,6 @@ npyiter_allocate_arrays(NpyIter *iter,
                 innershape = NAD_SHAPE(axisdata);
                 if (innershape != 1) {
                     innerstride = NAD_STRIDES(axisdata)[iiter];
-                    if (innerstride == 0) {
-                        is_one_to_one = 0;
-                    }
                     break;
                 }
                 NIT_ADVANCE_AXISDATA(axisdata, 1);
@@ -4576,9 +4619,6 @@ npyiter_allocate_arrays(NpyIter *iter,
                 stride = NAD_STRIDES(axisdata)[iiter];
                 shape = NAD_SHAPE(axisdata);
                 if (shape != 1) {
-                    if (stride == 0) {
-                        is_one_to_one = 0;
-                    }
                     /*
                      * If N times the inner stride doesn't equal this
                      * stride, the multi-dimensionality is needed.
@@ -4597,23 +4637,10 @@ npyiter_allocate_arrays(NpyIter *iter,
              * Set that stride, because it may not belong to the first
              * dimension.
              */
-            if (idim == ndim && !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) {
+            if (idim == ndim) {
                 op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
                 NBF_STRIDES(bufferdata)[iiter] = innerstride;
             }
-            else if (!is_one_to_one &&
-                        (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) &&
-                        !(flags&NPY_ITER_REDUCE_OK)) {
-                NPY_IT_DBG_PRINTF("Iterator: %d %d %d\n",
-                            (int)(!is_one_to_one),
-                            (int)((op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)),
-                            (int)(!(flags&NPY_ITER_REDUCE_OK)));
-                PyErr_SetString(PyExc_ValueError,
-                        "Iterator operand requires write buffering, "
-                        "but has dimensions which have been broadcasted "
-                        "and would be combined incorrectly");
-                return 0;
-            }
         }
     }
 
@@ -5201,9 +5228,15 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
         reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
         NBF_SIZE(bufferdata) = reduce_innersize;
         NBF_REDUCE_POS(bufferdata) = 0;
-        NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
         NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
         NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
+        if (reduce_innersize == 0) {
+            NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
+            return;
+        }
+        else {
+            NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
+        }
     }
     else {
         NBF_SIZE(bufferdata) = transfersize;
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 514575bf0..08f569502 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1796,7 +1796,10 @@ iterator_loop(PyUFuncObject *self,
     if (NpyIter_GetIterSize(iter) != 0) {
 
         /* Reset the iterator with the base pointers from the wrapped outputs */
-        for (i = 0; i < niter; ++i) {
+        for (i = 0; i < nin; ++i) {
+            baseptrs[i] = PyArray_BYTES(op_it[i]);
+        }
+        for (i = nin; i < niter; ++i) {
             baseptrs[i] = PyArray_BYTES(op[i]);
         }
         if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
diff --git a/numpy/core/tests/test_new_iterator.py b/numpy/core/tests/test_new_iterator.py
index 785bec88b..ad51862c9 100644
--- a/numpy/core/tests/test_new_iterator.py
+++ b/numpy/core/tests/test_new_iterator.py
@@ -866,17 +866,21 @@ def test_iter_scalar_cast():
     assert_equal(i.dtypes[0], np.dtype('i4'))
     assert_equal(i.value.dtype, np.dtype('i4'))
     assert_equal(i.value, 3)
+    # Readonly scalars may be cast even without setting COPY or BUFFERED
+    i = newiter(3, [], [['readonly']], op_dtypes=[np.dtype('f8')])
+    assert_equal(i[0].dtype, np.dtype('f8'))
+    assert_equal(i[0], 3.)
 
 def test_iter_scalar_cast_errors():
     # Check that invalid casts are caught
 
-    # Need to allow casting for casts to occur
+    # Need to allow copying/buffering for write casts of scalars to occur
     assert_raises(TypeError, newiter, np.float32(2), [],
-                [['readonly']], op_dtypes=[np.dtype('f8')])
+                [['readwrite']], op_dtypes=[np.dtype('f8')])
     assert_raises(TypeError, newiter, 2.5, [],
-                [['readonly']], op_dtypes=[np.dtype('f4')])
-    # 'f8' -> 'f4' isn't a safe cast
-    assert_raises(TypeError, newiter, np.float64(2), [],
+                [['readwrite']], op_dtypes=[np.dtype('f4')])
+    # 'f8' -> 'f4' isn't a safe cast if the value would overflow
+    assert_raises(TypeError, newiter, np.float64(1e60), [],
                 [['readonly']],
                 casting='safe',
                 op_dtypes=[np.dtype('f4')])
@@ -951,7 +955,7 @@ def test_iter_object_arrays_conversions():
     a[:] = np.arange(6) + 98172488
     i = newiter(a, ['refs_ok','buffered'], ['readwrite'],
                     casting='unsafe', op_dtypes='O')
-    ob = i[0][...]
+    ob = i[0][()]
     rc = sys.getrefcount(ob)
     for x in i:
         x[...] += 1
author	Mark Wiebe <mwwiebe@gmail.com>	2011-01-30 13:33:15 -0800
committer	Mark Wiebe <mwwiebe@gmail.com>	2011-01-30 13:47:38 -0800
commit	4faf10e9246a77bc19c77dc80363d809f5a85396 (patch)
tree	98c0eca62403b7b4a95140bf3a7f620e12ae4651
parent	395146e64101ac65a057214d64135993a4c67d16 (diff)
download	numpy-4faf10e9246a77bc19c77dc80363d809f5a85396.tar.gz