diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2010-12-20 23:39:31 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-09 01:55:00 -0800 |
commit | 82643de85d2a8d25ebba351d707a2f13122d6c52 (patch) | |
tree | 5918f260f8a8ae66ddc8e08de97eeaa03a48d3d4 | |
parent | 578e01aa1587bb50a2eb1da09276f83289d56d41 (diff) | |
download | numpy-82643de85d2a8d25ebba351d707a2f13122d6c52.tar.gz |
ENH: iter: Add buffering support to more casting/swapping cases
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 226 | ||||
-rw-r--r-- | numpy/core/tests/test_new_iterator.py | 67 |
2 files changed, 257 insertions, 36 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 8c62baace..f0f22aa78 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -34,7 +34,7 @@ #define _NPY_SWAP_PAIR4(x) (((((npy_uint32)x)&0xffu) << 8) | \ ((((npy_uint32)x)&0xff00u) >> 8) | \ ((((npy_uint32)x)&0xff0000u) << 8) | \ - (((npy_uint32)x) >> 8)) + ((((npy_uint32)x)&0xff000000u) >> 8)) #define _NPY_SWAP8(x) (((((npy_uint64)x)&0xffu) << 56) | \ ((((npy_uint64)x)&0xff00u) << 40) | \ @@ -52,7 +52,7 @@ ((((npy_uint64)x)&0xff00000000u) << 24) | \ ((((npy_uint64)x)&0xff0000000000u) << 8) | \ ((((npy_uint64)x)&0xff000000000000u) >> 8) | \ - (((npy_uint64)x) >> 24)) + ((((npy_uint64)x)&0xff00000000000000u) >> 24)) #define _NPY_SWAP_INPLACE2(x) { \ char a = (x)[0]; (x)[0] = (x)[1]; (x)[1] = a; \ @@ -115,6 +115,7 @@ static void npy_intp N, npy_intp NPY_UNUSED(itemsize), void *NPY_UNUSED(data)) { + /*printf("fn @prefix@_@oper@_size@elsize@\n");*/ while (N > 0) { #if @is_aligned@ @@ -675,9 +676,9 @@ _aligned_strided_to_strided_cast(char *dst, npy_intp dst_stride, } static void -_aligned_contig_to_contig_cast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp itemsize, +_aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), + char *src, npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(itemsize), void *data) { PyArray_VectorUnaryFunc *castfunc = ((_strided_cast_data *)data)->castfunc; @@ -685,33 +686,137 @@ _aligned_contig_to_contig_cast(char *dst, npy_intp dst_stride, castfunc(src, dst, N, NULL, NULL); } +/* Wraps a transfer function + data in alignment code */ +typedef struct { + void *freefunc; + PyArray_StridedTransferFn wrapped, + tobuffer, frombuffer; + void *wrappeddata; + npy_intp src_itemsize, dst_itemsize; + char *bufferin, *bufferout; +} _align_wrap_data; + +/* transfer data free function */ +void _align_wrap_data_free(_align_wrap_data *data) +{ + PyArray_FreeStridedTransferData(data->wrappeddata); + PyArray_free(data); +} + +static void +_strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp itemsize, + void *data) +{ + _align_wrap_data *d = (_align_wrap_data *)data; + PyArray_StridedTransferFn wrapped = d->wrapped, + tobuffer = d->tobuffer, + frombuffer = d->frombuffer; + npy_intp src_itemsize = d->src_itemsize, dst_itemsize = d->dst_itemsize; + void *wrappeddata = d->wrappeddata; + char *bufferin = d->bufferin, *bufferout = d->bufferout; + + for(;;) { + if (N > 32) { + tobuffer(bufferin, src_itemsize, src, src_stride, 32, + src_itemsize, NULL); + wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, 32, + itemsize, wrappeddata); + frombuffer(dst, dst_stride, bufferout, dst_itemsize, 32, + dst_itemsize, NULL); + N -= 32; + src += 32*src_stride; + dst += 32*dst_stride; + } + else { + tobuffer(bufferin, src_itemsize, src, src_stride, N, + src_itemsize, NULL); + wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N, + itemsize, wrappeddata); + frombuffer(dst, dst_stride, bufferout, dst_itemsize, N, + dst_itemsize, NULL); + return; + } + } +} + +/* + * Wraps an aligned contig to contig transfer function between either + * copies or byte swaps to temporary buffers. + * + * src_itemsize/dst_itemsize - The sizes of the src and dst datatypes. + * tobuffer - copy/swap function from src to an aligned contiguous buffer. + * data passed to 'tobuffer' is NULL. + * frombuffer - copy/swap function from an aligned contiguous buffer to dst. + * data passed to 'frombuffer' is NULL. + * wrapped - contig to contig transfer function being wrapped + * wrappeddata - data for wrapped + */ +NPY_NO_EXPORT void +PyArray_WrapTransferFunction(npy_intp src_itemsize, npy_intp dst_itemsize, + PyArray_StridedTransferFn tobuffer, + PyArray_StridedTransferFn frombuffer, + PyArray_StridedTransferFn wrapped, void *wrappeddata, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _align_wrap_data *data; + npy_intp basedatasize, datasize; + + /* Round up the structure size to 16-byte boundary */ + basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); + /* Add space for two 32-element buffers */ + datasize = basedatasize + 32*src_itemsize + 32*dst_itemsize; + + /* Allocate the data, and populate it */ + data = (_align_wrap_data *)PyArray_malloc(datasize); + data->freefunc = (void *)&_align_wrap_data_free; + data->tobuffer = tobuffer; + data->frombuffer = frombuffer; + data->wrapped = wrapped; + data->wrappeddata = wrappeddata; + data->src_itemsize = src_itemsize; + data->dst_itemsize = dst_itemsize; + data->bufferin = (char *)data + basedatasize; + data->bufferout = data->bufferin + 32*src_itemsize; + + /* Set the function and data */ + *outstransfer = &_strided_to_strided_contig_align_wrap; + *outtransferdata = data; +} NPY_NO_EXPORT int PyArray_GetTransferFunction(int aligned, npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *from, PyArray_Descr *to, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, PyArray_StridedTransferFn *outstransfer, void **outtransferdata) { + npy_intp src_itemsize = src_dtype->elsize, + dst_itemsize = dst_dtype->elsize; + int src_type_num = src_dtype->type_num, + dst_type_num = dst_dtype->type_num; + /* First look at the possibilities of just a copy or swap */ - if (from->elsize == to->elsize && from->type_num < NPY_OBJECT && - to->type_num < NPY_OBJECT && - from->kind == to->kind) { + if (src_itemsize == dst_itemsize && src_type_num < NPY_OBJECT && + dst_type_num < NPY_OBJECT && + src_dtype->kind == dst_dtype->kind) { /* This is a straight copy */ - if (from->elsize == 1 || PyArray_ISNBO(from->byteorder) == - PyArray_ISNBO(to->byteorder)) { + if (src_itemsize == 1 || PyArray_ISNBO(src_dtype->byteorder) == + PyArray_ISNBO(dst_dtype->byteorder)) { *outstransfer = PyArray_GetStridedCopyFn(aligned, src_stride, dst_stride, - from->elsize); + src_itemsize); *outtransferdata = NULL; return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; } /* This is a straight copy + byte swap */ - else if (!PyTypeNum_ISCOMPLEX(from->type_num)) { + else if (!PyTypeNum_ISCOMPLEX(src_type_num)) { *outstransfer = PyArray_GetStridedCopySwapFn(aligned, src_stride, dst_stride, - from->elsize); + src_itemsize); *outtransferdata = NULL; return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; } @@ -719,40 +824,42 @@ PyArray_GetTransferFunction(int aligned, else { *outstransfer = PyArray_GetStridedCopySwapPairFn(aligned, src_stride, dst_stride, - from->elsize); + src_itemsize); *outtransferdata = NULL; return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; } } - /* TODO check for fields & subarrays */ + /* Check whether a simple cast and some swaps will suffice */ + if (src_type_num < NPY_OBJECT && dst_type_num < NPY_OBJECT) { + _strided_cast_data *data; + PyArray_VectorUnaryFunc *castfunc; - /* Check whether a simple cast will suffice */ - if (from->type_num < NPY_OBJECT && to->type_num < NPY_OBJECT && - PyArray_ISNBO(from->type_num) && PyArray_ISNBO(to->type_num)) { - PyArray_VectorUnaryFunc *castfunc = - PyArray_GetCastFunc(from, to->type_num); + /* Get the cast function */ + castfunc = PyArray_GetCastFunc(src_dtype, dst_type_num); if (!castfunc) { *outstransfer = NULL; *outtransferdata = NULL; return NPY_FAIL; } - if (aligned) { - /* Allocate the data that describes the cast */ - _strided_cast_data *data = - PyArray_malloc(sizeof(_strided_cast_data)); - if (data == NULL) { - PyErr_NoMemory(); - *outstransfer = NULL; - *outtransferdata = NULL; - return NPY_FAIL; - } - data->freefunc = (void*)&(PyArray_free); - data->castfunc = castfunc; + /* Allocate the data for the casting */ + data = PyArray_malloc(sizeof(_strided_cast_data)); + if (data == NULL) { + PyErr_NoMemory(); + *outstransfer = NULL; + *outtransferdata = NULL; + return NPY_FAIL; + } + data->freefunc = (void*)&(PyArray_free); + data->castfunc = castfunc; + + /* If it's aligned and all native byte order, we're all done */ + if (aligned && PyArray_ISNBO(src_dtype->byteorder) && + PyArray_ISNBO(dst_dtype->byteorder)) { /* Choose the contiguous cast if we can */ - if (src_stride == from->elsize && dst_stride == to->elsize) { + if (src_stride == src_itemsize && dst_stride == dst_itemsize) { *outstransfer = _aligned_contig_to_contig_cast; } else { @@ -762,10 +869,59 @@ PyArray_GetTransferFunction(int aligned, return NPY_SUCCEED; } + /* Otherwise, we have to copy and/or swap to aligned temporaries */ + else { + PyArray_StridedTransferFn tobuffer, frombuffer, casttransfer; - /* TODO wrap the cast in an alignment operation */ + /* Get the copy/swap operation from src */ + if (PyArray_ISNBO(src_dtype->byteorder)) { + tobuffer = PyArray_GetStridedCopyFn(aligned, + src_stride, src_itemsize, + src_itemsize); + } + else if(!PyTypeNum_ISCOMPLEX(src_type_num)) { + tobuffer = PyArray_GetStridedCopySwapFn(aligned, + src_stride, src_itemsize, + src_itemsize); + } + else { + tobuffer = PyArray_GetStridedCopySwapPairFn(aligned, + src_stride, src_itemsize, + src_itemsize); + } + + /* Get the copy/swap operation to dst */ + if (PyArray_ISNBO(dst_dtype->byteorder)) { + frombuffer = PyArray_GetStridedCopyFn(aligned, + dst_itemsize, dst_stride, + dst_itemsize); + } + else if(!PyTypeNum_ISCOMPLEX(dst_type_num)) { + frombuffer = PyArray_GetStridedCopySwapFn(aligned, + dst_itemsize, dst_stride, + dst_itemsize); + } + else { + frombuffer = PyArray_GetStridedCopySwapPairFn(aligned, + dst_itemsize, dst_stride, + dst_itemsize); + } + + /* Use the aligned contiguous cast */ + casttransfer = &_aligned_contig_to_contig_cast; + + /* Wrap it all up in a new transfer function + data */ + PyArray_WrapTransferFunction(src_itemsize, dst_itemsize, + tobuffer, frombuffer, + casttransfer, data, + outstransfer, outtransferdata); + + return NPY_SUCCEED; + } } + /* TODO check for fields & subarrays */ + /* TODO: write the more complicated transfer code! */ *outstransfer = NULL; *outtransferdata = NULL; diff --git a/numpy/core/tests/test_new_iterator.py b/numpy/core/tests/test_new_iterator.py index 749a4fcae..c93b13539 100644 --- a/numpy/core/tests/test_new_iterator.py +++ b/numpy/core/tests/test_new_iterator.py @@ -1131,7 +1131,7 @@ def test_iter_write_buffering(): i.iternext() assert_equal(a.ravel(order='C'), np.arange(24)) -def test_iter_cast_buffering(): +def test_iter_buffered_cast_simple(): # Test that buffering can handle a simple cast a = np.arange(10, dtype='f4') @@ -1144,6 +1144,71 @@ def test_iter_cast_buffering(): assert_equal(a, 2*np.arange(10, dtype='f4')) +def test_iter_buffered_cast_byteswapped(): + # Test that buffering can handle a cast which requires swap->cast->swap + + a = np.arange(10, dtype='f4').newbyteorder().byteswap() + i = np.newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo_aligned','same_kind_casts']], + op_dtypes=[np.dtype('f8').newbyteorder()], + buffersize=3) + for v in i: + v[()] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f4')) + + a = np.arange(10, dtype='f8').newbyteorder().byteswap() + i = np.newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo_aligned','unsafe_casts']], + op_dtypes=[np.dtype('c8').newbyteorder()], + buffersize=3) + for v in i: + v[()] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f8')) + +def test_iter_buffered_cast_byteswapped(): + # Test that buffering can handle a cast which requires swap->cast->copy + + a = np.arange(10, dtype='c8').newbyteorder().byteswap() + a += 2j + i = np.newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo_aligned','same_kind_casts']], + op_dtypes=[np.dtype('c16')], + buffersize=3) + for v in i: + v[()] *= 2 + assert_equal(a, 2*np.arange(10, dtype='c8') + 4j) + + a = np.arange(10, dtype='c8') + a += 2j + i = np.newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo_aligned','same_kind_casts']], + op_dtypes=[np.dtype('c16').newbyteorder()], + buffersize=3) + for v in i: + v[()] *= 2 + assert_equal(a, 2*np.arange(10, dtype='c8') + 4j) + + a = np.arange(10, dtype=np.clongdouble).newbyteorder().byteswap() + a += 2j + i = np.newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo_aligned','same_kind_casts']], + op_dtypes=[np.dtype('c16')], + buffersize=3) + for v in i: + v[()] *= 2 + assert_equal(a, 2*np.arange(10, dtype=np.clongdouble) + 4j) + + a = np.arange(10, dtype=np.longdouble).newbyteorder().byteswap() + i = np.newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo_aligned','same_kind_casts']], + op_dtypes=[np.dtype('f4')], + buffersize=7) + for v in i: + v[()] *= 2 + assert_equal(a, 2*np.arange(10, dtype=np.longdouble)) + def test_iter_buffering_growinner(): # Test that the inner loop grows when no buffering is needed a = np.arange(30) |