summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2010-12-21 10:07:12 -0800
committerMark Wiebe <mwwiebe@gmail.com>2011-01-09 01:55:00 -0800
commit516a38d1090ff3504a60da3da9205bd1fe0ff0d9 (patch)
tree940f2be670c8bbc2551038e0308955ed9b3cfe20 /numpy
parented53e18a37736cfd7dd51c893d94471c4a37e92e (diff)
downloadnumpy-516a38d1090ff3504a60da3da9205bd1fe0ff0d9.tar.gz
ENH: iter: Detect cases where buffering of some operands can be skipped
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src2
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.h6
-rw-r--r--numpy/core/src/multiarray/new_iterator.c.src136
-rw-r--r--numpy/core/tests/test_new_iterator.py29
4 files changed, 126 insertions, 47 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index d317eeb2e..06f44d23f 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -788,7 +788,7 @@ PyArray_WrapTransferFunction(npy_intp src_itemsize, npy_intp dst_itemsize,
NPY_NO_EXPORT int
-PyArray_GetTransferFunction(int aligned,
+PyArray_GetDTypeTransferFunction(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
PyArray_StridedTransferFn *outstransfer,
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.h b/numpy/core/src/multiarray/lowlevel_strided_loops.h
index b3d9c9de0..66cf60f02 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.h
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.h
@@ -75,13 +75,13 @@ PyArray_GetStridedCopySwapPairFn(npy_intp aligned, npy_intp src_stride,
* If it's possible, gives back a transfer function which casts and/or
* byte swaps data with the dtype 'from' into data with the dtype 'to'.
* If the outtransferdata is populated with a non-NULL value, it
- * must be deallocated with the PyArray_free function when the transfer
- * function is no longer required.
+ * must be deallocated with the ``PyArray_FreeStridedTransferData``
+ * function when the transfer function is no longer required.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
-PyArray_GetTransferFunction(int aligned,
+PyArray_GetDTypeTransferFunction(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *from, PyArray_Descr *to,
PyArray_StridedTransferFn *outstransfer,
diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src
index a29d0604d..91dba7bdd 100644
--- a/numpy/core/src/multiarray/new_iterator.c.src
+++ b/numpy/core/src/multiarray/new_iterator.c.src
@@ -50,8 +50,6 @@
#define NPY_OP_ITFLAG_COPYSWAP 0x10
/* The operand never needs buffering */
#define NPY_OP_ITFLAG_BUFNEVER 0x20
-/* The operand needs to be byte-swapped */
-#define NPY_OP_ITFLAG_BUFSWAP 0x40
/* The operand is aligned */
#define NPY_OP_ITFLAG_ALIGNED 0x80
@@ -656,8 +654,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter],
PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
- /* Newly allocated arrays need no buffering */
- op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
+ /* New arrays are aligned and need no swapping or casting */
+ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST);
}
else if ((op_itflags[iiter]&
@@ -703,8 +701,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
npyiter_replace_axisdata(iter, iiter, op[iiter], op_ndim[iiter],
PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL);
- /* Copied arrays need no buffering */
- op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
+ /* Now it is aligned, and no longer needs a swap or cast */
+ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
op_itflags[iiter] &= ~(NPY_OP_ITFLAG_COPYSWAP|NPY_OP_ITFLAG_CAST);
}
else {
@@ -729,17 +727,83 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags,
if (PyArray_ISALIGNED(op[iiter])) {
op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED;
}
+ }
+ /*
+ * If no alignment, byte swap, or casting is needed, and
+ * the inner stride of this operand works for the whole
+ * array, we can set NPY_OP_ITFLAG_BUFNEVER.
+ * But, if buffering is enabled, write-buffering must be
+ * one-to-one, because the buffering write back won't combine
+ * values correctly. This test doesn't catch everything, but it will
+ * catch the most common case of a broadcasting a write-buffered
+ * dimension.
+ */
+ if ((itflags&NPY_ITFLAG_BUFFER) &&
+ (!(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
+ NPY_OP_ITFLAG_COPYSWAP)) ||
+ (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE))) {
+ int is_one_to_one = 1;
+ npy_intp stride, shape, innerstride = 0, innershape;
+ char *axisdata = NIT_AXISDATA(iter);
+ npy_intp sizeof_axisdata =
+ NIT_SIZEOF_AXISDATA(itflags, ndim, niter);
+ /* Find stride of the first non-empty shape */
+ for (idim = 0; idim < ndim; ++idim) {
+ innershape = NAD_SHAPE(axisdata);
+ if (innershape != 1) {
+ innerstride = NAD_STRIDES(axisdata)[iiter];
+ if (innerstride == 0) {
+ is_one_to_one = 0;
+ }
+ break;
+ }
+ axisdata += sizeof_axisdata;
+ }
+ ++idim;
+ axisdata += sizeof_axisdata;
+ /* Check that everything could have coalesced together */
+ for (; idim < ndim; ++idim) {
+ stride = NAD_STRIDES(axisdata)[iiter];
+ shape = NAD_SHAPE(axisdata);
+ if (shape != 1) {
+ if (stride == 0) {
+ is_one_to_one = 0;
+ }
+ /*
+ * If N times the inner stride doesn't equal this
+ * stride, the multi-dimensionality is needed.
+ */
+ if (innerstride*innershape != stride) {
+ break;
+ }
+ else {
+ innershape *= shape;
+ }
+ }
+ axisdata += sizeof_axisdata;
+ }
/*
- * If the operand has a different byte order than the
- * desired data type, need a swap
+ * If we looped all the way to the end, one stride works.
+ * Set that stride, because it may not belong to the first
+ * dimension.
*/
- if (PyArray_ISNBO(PyArray_DESCR(op[iiter])->byteorder) !=
- PyArray_ISNBO(op_dtype[iiter]->byteorder)) {
- op_itflags[iiter] |= NPY_OP_ITFLAG_BUFSWAP;
+ if (idim == ndim &&
+ !(op_itflags[iiter]&(NPY_OP_ITFLAG_CAST|
+ NPY_OP_ITFLAG_COPYSWAP))) {
+ op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER;
+ NBF_STRIDES(bufferdata)[iiter] = innerstride;
+ }
+ else if (!is_one_to_one &&
+ (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Iterator operand requires write buffering, "
+ "but has dimensions which have been broadcasted "
+ "and would be combined incorrectly");
+ NpyIter_Deallocate(iter);
+ return NULL;
}
}
-
}
/*
@@ -3288,7 +3352,7 @@ npyiter_allocate_buffers(NpyIter *iter)
/* Also need to get an appropriate transfer functions */
if (flags&NPY_OP_ITFLAG_READ) {
- if (PyArray_GetTransferFunction(
+ if (PyArray_GetDTypeTransferFunction(
(flags&NPY_OP_ITFLAG_ALIGNED) != 0,
op_stride,
op_dtype[iiter]->elsize,
@@ -3305,7 +3369,7 @@ npyiter_allocate_buffers(NpyIter *iter)
NBF_READTRANSFERFN(bufferdata)[iiter] = NULL;
}
if (flags&NPY_OP_ITFLAG_WRITE) {
- if (PyArray_GetTransferFunction(
+ if (PyArray_GetDTypeTransferFunction(
(flags&NPY_OP_ITFLAG_ALIGNED) != 0,
op_dtype[iiter]->elsize,
op_stride,
@@ -3322,6 +3386,10 @@ npyiter_allocate_buffers(NpyIter *iter)
NBF_WRITETRANSFERFN(bufferdata)[iiter] = NULL;
}
}
+ else {
+ NBF_READTRANSFERFN(bufferdata)[iiter] = NULL;
+ NBF_WRITETRANSFERFN(bufferdata)[iiter] = NULL;
+ }
}
return 1;
@@ -3548,43 +3616,27 @@ npyiter_copy_to_buffers(NpyIter *iter)
transferdata = NBF_READTRANSFERDATA(bufferdata)[iiter];
switch (op_itflags[iiter]&
(NPY_OP_ITFLAG_BUFNEVER|
- NPY_OP_ITFLAG_BUFSWAP|
- NPY_OP_ITFLAG_ALIGNED|
+ NPY_OP_ITFLAG_COPYSWAP|
NPY_OP_ITFLAG_CAST)) {
/* never need to buffer this operand */
case NPY_OP_ITFLAG_BUFNEVER:
ptrs[iiter] = ad_ptrs[iiter];
- strides[iiter] = ad_strides[iiter];
- stransfer = NULL;
- break;
- /* aligned copy*/
- case NPY_OP_ITFLAG_ALIGNED:
/*
- * Since all we're doing is copying the data to make
- * it work with one stride, if it already does there's
- * no need to copy.
+ * Should not adjust the stride - ad_strides[iiter]
+ * could be zero, but strides[iiter] was initialized
+ * to the first non-trivial stride.
*/
- if (is_onestride) {
- ptrs[iiter] = ad_ptrs[iiter];
- strides[iiter] = ad_strides[iiter];
- stransfer = NULL;
- }
- else {
- /* In this case, the buffer is being used */
- ptrs[iiter] = buffers[iiter];
- strides[iiter] = dtypes[iiter]->elsize;
- }
+ stransfer = NULL;
break;
- /* unaligned copy */
+ /* just a copy */
case 0:
/*
- * If aligned data wasn't requested (as seen by the
- * copyswap flag not being set), all we're doing is
- * copying the data to make it work with one stride.
- * If it already does there's no need to copy.
+ * No copyswap or cast was requested, so all we're
+ * doing is copying the data to fill the buffer and
+ * produce a single stride. If the underlying data
+ * already does that, no need to copy it.
*/
- if (is_onestride &&
- !(op_itflags[iiter]&NPY_OP_ITFLAG_COPYSWAP)) {
+ if (is_onestride) {
ptrs[iiter] = ad_ptrs[iiter];
strides[iiter] = ad_strides[iiter];
stransfer = NULL;
@@ -3719,8 +3771,6 @@ NpyIter_DebugPrint(NpyIter *iter)
printf("COPYSWAP ");
if ((NIT_OPITFLAGS(iter)[iiter])&NPY_OP_ITFLAG_BUFNEVER)
printf("BUFNEVER ");
- if ((NIT_OPITFLAGS(iter)[iiter])&NPY_OP_ITFLAG_BUFSWAP)
- printf("BUFSWAP ");
if ((NIT_OPITFLAGS(iter)[iiter])&NPY_OP_ITFLAG_ALIGNED)
printf("ALIGNED ");
printf("\n");
diff --git a/numpy/core/tests/test_new_iterator.py b/numpy/core/tests/test_new_iterator.py
index 909c2f0d2..076c9e8a6 100644
--- a/numpy/core/tests/test_new_iterator.py
+++ b/numpy/core/tests/test_new_iterator.py
@@ -1211,6 +1211,35 @@ def test_iter_buffered_cast_byteswapped():
v[()] *= 2
assert_equal(a, 2*np.arange(10, dtype=np.longdouble))
+def test_iter_buffering_badwriteback():
+ # Writing back from a buffer cannot combine elements
+
+ # a needs write buffering, but had a broadcast dimension
+ a = np.arange(6).reshape(2,3,1)
+ b = np.arange(12).reshape(2,3,2)
+ assert_raises(ValueError,np.newiter,[a,b],
+ ['buffered','no_inner_iteration','force_c_order'],
+ [['readwrite'],['writeonly']])
+
+ # But if a is readonly, it's fine
+ i = np.newiter([a,b],['buffered','no_inner_iteration','force_c_order'],
+ [['readonly'],['writeonly']])
+
+ # If a has just one element, it's fine too (constant 0 stride)
+ a = np.arange(1).reshape(1,1,1)
+ i = np.newiter([a,b],['buffered','no_inner_iteration','force_c_order'],
+ [['readwrite'],['writeonly']])
+
+ # check that it fails on other dimensions too
+ a = np.arange(6).reshape(1,3,2)
+ assert_raises(ValueError,np.newiter,[a,b],
+ ['buffered','no_inner_iteration','force_c_order'],
+ [['readwrite'],['writeonly']])
+ a = np.arange(4).reshape(2,1,2)
+ assert_raises(ValueError,np.newiter,[a,b],
+ ['buffered','no_inner_iteration','force_c_order'],
+ [['readwrite'],['writeonly']])
+
def test_iter_buffering_growinner():
# Test that the inner loop grows when no buffering is needed
a = np.arange(30)