diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-30 13:33:15 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-30 13:47:38 -0800 |
commit | 4faf10e9246a77bc19c77dc80363d809f5a85396 (patch) | |
tree | 98c0eca62403b7b4a95140bf3a7f620e12ae4651 | |
parent | 395146e64101ac65a057214d64135993a4c67d16 (diff) | |
download | numpy-4faf10e9246a77bc19c77dc80363d809f5a85396.tar.gz |
ENH: iter: Allow copies of read-only scalar arrays even if COPY isn't specified
When buffering, this reduces the work needed by a lot. For non-reduction
operands, the iterator always provides contiguous data when an operand
is buffered, but by making a copy the operand can be flagged as never
needing buffering
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 97 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 5 | ||||
-rw-r--r-- | numpy/core/tests/test_new_iterator.py | 16 |
3 files changed, 79 insertions, 39 deletions
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 9a8977b8a..8c8a1b897 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -4416,10 +4416,16 @@ npyiter_allocate_arrays(NpyIter *iter, npy_intp idim, ndim = NIT_NDIM(iter); npy_intp iiter, niter = NIT_NITER(iter); - NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter); + NpyIter_BufferData *bufferdata = NULL; PyArrayObject **op = NIT_OPERANDS(iter); + if (itflags&NPY_ITFLAG_BUFFER) { + bufferdata = NIT_BUFFERDATA(iter); + } + + for (iiter = 0; iiter < niter; ++iiter) { + /* NULL means an output the iterator should allocate */ if (op[iiter] == NULL) { PyArrayObject *out; PyTypeObject *op_subtype; @@ -4453,6 +4459,50 @@ npyiter_allocate_arrays(NpyIter *iter, op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST; } + /* + * If casting is required, the operand is read-only, and + * it's an array scalar, make a copy whether or not the + * copy flag is enabled. + */ + else if ((op_itflags[iiter]&(NPY_OP_ITFLAG_CAST| + NPY_OP_ITFLAG_READ| + NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST| + NPY_OP_ITFLAG_READ) && + PyArray_NDIM(op[iiter]) == 0) { + PyArrayObject *temp; + Py_INCREF(op_dtype[iiter]); + temp = (PyArrayObject *)PyArray_NewFromDescr( + &PyArray_Type, op_dtype[iiter], + 0, NULL, NULL, NULL, 0, NULL); + if (temp == NULL) { + return 0; + } + if (PyArray_CopyInto(temp, op[iiter]) != 0) { + Py_DECREF(temp); + return 0; + } + Py_DECREF(op[iiter]); + op[iiter] = temp; + + /* + * Now we need to replace the pointers and strides with values + * from the temporary array. + */ + npyiter_replace_axisdata(iter, iiter, op[iiter], 0, + PyArray_DATA(op[iiter]), NULL); + + /* + * New arrays are aligned need no cast, and in the case + * of scalars, always have stride 0 so never need buffering + */ + op_itflags[iiter] |= (NPY_OP_ITFLAG_ALIGNED| + NPY_OP_ITFLAG_BUFNEVER); + op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST; + if (itflags&NPY_ITFLAG_BUFFER) { + NBF_STRIDES(bufferdata)[iiter] = 0; + } + } + /* If casting is required and permitted */ else if ((op_itflags[iiter]&NPY_OP_ITFLAG_CAST) && (op_flags[iiter]&(NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) { PyArrayObject *temp; @@ -4494,7 +4544,10 @@ npyiter_allocate_arrays(NpyIter *iter, npyiter_replace_axisdata(iter, iiter, op[iiter], ondim, PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); - /* The temporary copy is aligned and needs no cast */ + /* + * The temporary copy is aligned and needs no cast, and + * has constant stride 0 so never needs buffering + */ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST; } @@ -4543,16 +4596,9 @@ npyiter_allocate_arrays(NpyIter *iter, * If no alignment, byte swap, or casting is needed, and * the inner stride of this operand works for the whole * array, we can set NPY_OP_ITFLAG_BUFNEVER. - * But, if buffering is enabled, write-buffering must be - * one-to-one, because the buffering write back won't combine - * values correctly. This test doesn't catch everything, but it will - * catch the most common case of a broadcasting a write-buffered - * dimension. */ - if ((itflags&NPY_ITFLAG_BUFFER) && - (!(op_itflags[iiter]&NPY_OP_ITFLAG_CAST) || - (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) { - int is_one_to_one = 1; + if (PyArray_NDIM(op[iiter]) > 0 && (itflags&NPY_ITFLAG_BUFFER) && + !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) { npy_intp stride, shape, innerstride = 0, innershape; NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); npy_intp sizeof_axisdata = @@ -4562,9 +4608,6 @@ npyiter_allocate_arrays(NpyIter *iter, innershape = NAD_SHAPE(axisdata); if (innershape != 1) { innerstride = NAD_STRIDES(axisdata)[iiter]; - if (innerstride == 0) { - is_one_to_one = 0; - } break; } NIT_ADVANCE_AXISDATA(axisdata, 1); @@ -4576,9 +4619,6 @@ npyiter_allocate_arrays(NpyIter *iter, stride = NAD_STRIDES(axisdata)[iiter]; shape = NAD_SHAPE(axisdata); if (shape != 1) { - if (stride == 0) { - is_one_to_one = 0; - } /* * If N times the inner stride doesn't equal this * stride, the multi-dimensionality is needed. @@ -4597,23 +4637,10 @@ npyiter_allocate_arrays(NpyIter *iter, * Set that stride, because it may not belong to the first * dimension. */ - if (idim == ndim && !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) { + if (idim == ndim) { op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; NBF_STRIDES(bufferdata)[iiter] = innerstride; } - else if (!is_one_to_one && - (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) && - !(flags&NPY_ITER_REDUCE_OK)) { - NPY_IT_DBG_PRINTF("Iterator: %d %d %d\n", - (int)(!is_one_to_one), - (int)((op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)), - (int)(!(flags&NPY_ITER_REDUCE_OK))); - PyErr_SetString(PyExc_ValueError, - "Iterator operand requires write buffering, " - "but has dimensions which have been broadcasted " - "and would be combined incorrectly"); - return 0; - } } } @@ -5201,9 +5228,15 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim); NBF_SIZE(bufferdata) = reduce_innersize; NBF_REDUCE_POS(bufferdata) = 0; - NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize; NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim; NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize; + if (reduce_innersize == 0) { + NBF_REDUCE_OUTERSIZE(bufferdata) = 0; + return; + } + else { + NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize; + } } else { NBF_SIZE(bufferdata) = transfersize; diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 514575bf0..08f569502 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -1796,7 +1796,10 @@ iterator_loop(PyUFuncObject *self, if (NpyIter_GetIterSize(iter) != 0) { /* Reset the iterator with the base pointers from the wrapped outputs */ - for (i = 0; i < niter; ++i) { + for (i = 0; i < nin; ++i) { + baseptrs[i] = PyArray_BYTES(op_it[i]); + } + for (i = nin; i < niter; ++i) { baseptrs[i] = PyArray_BYTES(op[i]); } if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) { diff --git a/numpy/core/tests/test_new_iterator.py b/numpy/core/tests/test_new_iterator.py index 785bec88b..ad51862c9 100644 --- a/numpy/core/tests/test_new_iterator.py +++ b/numpy/core/tests/test_new_iterator.py @@ -866,17 +866,21 @@ def test_iter_scalar_cast(): assert_equal(i.dtypes[0], np.dtype('i4')) assert_equal(i.value.dtype, np.dtype('i4')) assert_equal(i.value, 3) + # Readonly scalars may be cast even without setting COPY or BUFFERED + i = newiter(3, [], [['readonly']], op_dtypes=[np.dtype('f8')]) + assert_equal(i[0].dtype, np.dtype('f8')) + assert_equal(i[0], 3.) def test_iter_scalar_cast_errors(): # Check that invalid casts are caught - # Need to allow casting for casts to occur + # Need to allow copying/buffering for write casts of scalars to occur assert_raises(TypeError, newiter, np.float32(2), [], - [['readonly']], op_dtypes=[np.dtype('f8')]) + [['readwrite']], op_dtypes=[np.dtype('f8')]) assert_raises(TypeError, newiter, 2.5, [], - [['readonly']], op_dtypes=[np.dtype('f4')]) - # 'f8' -> 'f4' isn't a safe cast - assert_raises(TypeError, newiter, np.float64(2), [], + [['readwrite']], op_dtypes=[np.dtype('f4')]) + # 'f8' -> 'f4' isn't a safe cast if the value would overflow + assert_raises(TypeError, newiter, np.float64(1e60), [], [['readonly']], casting='safe', op_dtypes=[np.dtype('f4')]) @@ -951,7 +955,7 @@ def test_iter_object_arrays_conversions(): a[:] = np.arange(6) + 98172488 i = newiter(a, ['refs_ok','buffered'], ['readwrite'], casting='unsafe', op_dtypes='O') - ob = i[0][...] + ob = i[0][()] rc = sys.getrefcount(ob) for x in i: x[...] += 1 |