diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-14 17:35:23 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-14 23:49:09 -0800 |
commit | 5245d390eb5e22634fd3a48a13c276216a2f894f (patch) | |
tree | 06c59fc36d94f73d01e8edb939cdf9f2348271b6 | |
parent | 85f391bcf2e580e4a5644eba3719e13a6c135638 (diff) | |
download | numpy-5245d390eb5e22634fd3a48a13c276216a2f894f.tar.gz |
ENH: iter: Add support for buffering arrays with fields and subarrays
-rw-r--r-- | numpy/core/setup.py | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 2249 | ||||
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 1212 | ||||
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.h | 26 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule_onefile.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/new_iterator.c.src | 9 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 4 | ||||
-rw-r--r-- | numpy/core/tests/test_new_iterator.py | 361 |
8 files changed, 2633 insertions, 1231 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index ca19abb5b..0a9150f67 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -770,6 +770,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'scalartypes.c.src'), join('src', 'multiarray', 'new_iterator.c.src'), join('src', 'multiarray', 'lowlevel_strided_loops.c.src'), + join('src', 'multiarray', 'dtype_transfer.c'), join('src', 'multiarray', 'new_iterator_pywrap.c')] if PYTHON_HAS_UNICODE_WIDE: diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c new file mode 100644 index 000000000..21607cf48 --- /dev/null +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -0,0 +1,2249 @@ +/* + * This file contains low-level loops for data type transfers. + * In particular the function PyArray_GetDTypeTransferFunction is + * implemented here. + */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "structmember.h" + +#define _MULTIARRAYMODULE +#include <numpy/ndarrayobject.h> +#include <numpy/ufuncobject.h> +#include <numpy/npy_cpu.h> + +#include "lowlevel_strided_loops.h" + +#define NPY_LOWLEVEL_BUFFER_BLOCKSIZE 128 + +/*************************** COPY REFERENCES *******************************/ + +/* Moves references from src to dst */ +static void +_strided_to_strided_move_references(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + PyObject *src_ref = NULL, *dst_ref = NULL; + while (N > 0) { + NPY_COPY_PYOBJECT_PTR(&src_ref, src); + NPY_COPY_PYOBJECT_PTR(&dst_ref, dst); + + /* Release the reference in dst */ + Py_XDECREF(dst_ref); + /* Move the reference */ + NPY_COPY_PYOBJECT_PTR(dst, &src_ref); + /* Set the source reference to NULL */ + src_ref = NULL; + NPY_COPY_PYOBJECT_PTR(src, &src_ref); + + src += src_stride; + dst += dst_stride; + --N; + } +} + +/* Copies references from src to dst */ +static void +_strided_to_strided_copy_references(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + PyObject *src_ref = NULL, *dst_ref = NULL; + while (N > 0) { + NPY_COPY_PYOBJECT_PTR(&src_ref, src); + NPY_COPY_PYOBJECT_PTR(&dst_ref, dst); + + /* Release the reference in dst */ + Py_XDECREF(dst_ref); + /* Copy the reference */ + NPY_COPY_PYOBJECT_PTR(dst, &src_ref); + /* Claim the reference */ + Py_XINCREF(src_ref); + + src += src_stride; + dst += dst_stride; + --N; + } +} + +/************************** ZERO-PADDED COPY ******************************/ + +/* Does a zero-padded copy */ +typedef struct { + void *freefunc, *copyfunc; + npy_intp dst_itemsize; +} _strided_zero_pad_data; + +/* zero-padded data copy function */ +_strided_zero_pad_data *_strided_zero_pad_data_copy( + _strided_zero_pad_data *data) +{ + _strided_zero_pad_data *newdata = + (_strided_zero_pad_data *)PyArray_malloc( + sizeof(_strided_zero_pad_data)); + if (newdata == NULL) { + return NULL; + } + + memcpy(newdata, data, sizeof(_strided_zero_pad_data)); + + return newdata; +} + +/* + * Does a strided to strided zero-padded copy for the case where + * dst_itemsize > src_itemsize + */ +static void +_strided_to_strided_zero_pad_copy(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _strided_zero_pad_data *d = (_strided_zero_pad_data *)data; + npy_intp dst_itemsize = d->dst_itemsize; + npy_intp zero_size = dst_itemsize-src_itemsize; + + while (N > 0) { + memcpy(dst, src, src_itemsize); + memset(dst + src_itemsize, 0, zero_size); + src += src_stride; + dst += dst_stride; + --N; + } +} + +NPY_NO_EXPORT int +PyArray_GetStridedZeroPadCopyFn(int aligned, + npy_intp src_stride, npy_intp dst_stride, + npy_intp src_itemsize, npy_intp dst_itemsize, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + if (src_itemsize >= dst_itemsize) { + /* If the sizes are different, the alignment flag isn't trustworthy */ + if (src_itemsize != dst_itemsize) { + aligned = 0; + } + *outstransfer = PyArray_GetStridedCopyFn(aligned, src_stride, + dst_stride, dst_itemsize); + *outtransferdata = NULL; + return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; + } + else { + _strided_zero_pad_data *d = PyArray_malloc( + sizeof(_strided_zero_pad_data)); + if (d == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + d->dst_itemsize = dst_itemsize; + d->freefunc = &PyArray_free; + d->copyfunc = &_strided_zero_pad_data_copy; + + *outstransfer = &_strided_to_strided_zero_pad_copy; + *outtransferdata = d; + return NPY_SUCCEED; + } +} + +/***************** WRAP ALIGNED CONTIGUOUS TRANSFER FUNCTION **************/ + +/* Wraps a transfer function + data in alignment code */ +typedef struct { + void *freefunc, *copyfunc; + PyArray_StridedTransferFn wrapped, + tobuffer, frombuffer; + void *wrappeddata, *todata, *fromdata; + npy_intp src_itemsize, dst_itemsize; + char *bufferin, *bufferout; +} _align_wrap_data; + +/* transfer data free function */ +void _align_wrap_data_free(_align_wrap_data *data) +{ + PyArray_FreeStridedTransferData(data->wrappeddata); + PyArray_FreeStridedTransferData(data->todata); + PyArray_FreeStridedTransferData(data->fromdata); + PyArray_free(data); +} + +/* transfer data copy function */ +_align_wrap_data *_align_wrap_data_copy(_align_wrap_data *data) +{ + _align_wrap_data *newdata; + npy_intp basedatasize, datasize; + + /* Round up the structure size to 16-byte boundary */ + basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); + /* Add space for two low level buffers */ + datasize = basedatasize + + NPY_LOWLEVEL_BUFFER_BLOCKSIZE*data->src_itemsize + + NPY_LOWLEVEL_BUFFER_BLOCKSIZE*data->dst_itemsize; + + /* Allocate the data, and populate it */ + newdata = (_align_wrap_data *)PyArray_malloc(datasize); + if (newdata == NULL) { + return NULL; + } + memcpy(newdata, data, basedatasize); + newdata->bufferin = (char *)newdata + basedatasize; + newdata->bufferout = newdata->bufferin + + NPY_LOWLEVEL_BUFFER_BLOCKSIZE*newdata->src_itemsize; + if (newdata->wrappeddata != NULL) { + newdata->wrappeddata = + PyArray_CopyStridedTransferData(data->wrappeddata); + if (newdata->wrappeddata == NULL) { + PyArray_free(newdata); + return NULL; + } + } + if (newdata->todata != NULL) { + newdata->todata = PyArray_CopyStridedTransferData(data->todata); + if (newdata->todata == NULL) { + PyArray_FreeStridedTransferData(newdata->wrappeddata); + PyArray_free(newdata); + return NULL; + } + } + if (newdata->fromdata != NULL) { + newdata->fromdata = PyArray_CopyStridedTransferData(data->fromdata); + if (newdata->fromdata == NULL) { + PyArray_FreeStridedTransferData(newdata->wrappeddata); + PyArray_FreeStridedTransferData(newdata->todata); + PyArray_free(newdata); + return NULL; + } + } + + return newdata; +} + +static void +_strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _align_wrap_data *d = (_align_wrap_data *)data; + PyArray_StridedTransferFn wrapped = d->wrapped, + tobuffer = d->tobuffer, + frombuffer = d->frombuffer; + npy_intp dst_itemsize = d->dst_itemsize; + void *wrappeddata = d->wrappeddata, + *todata = d->todata, + *fromdata = d->fromdata; + char *bufferin = d->bufferin, *bufferout = d->bufferout; + + for(;;) { + if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { + tobuffer(bufferin, src_itemsize, src, src_stride, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + src_itemsize, todata); + wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + src_itemsize, wrappeddata); + frombuffer(dst, dst_stride, bufferout, dst_itemsize, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + dst_itemsize, fromdata); + N -= NPY_LOWLEVEL_BUFFER_BLOCKSIZE; + src += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_stride; + dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; + } + else { + tobuffer(bufferin, src_itemsize, src, src_stride, N, + src_itemsize, todata); + wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N, + src_itemsize, wrappeddata); + frombuffer(dst, dst_stride, bufferout, dst_itemsize, N, + dst_itemsize, fromdata); + return; + } + } +} + +static void +_strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _align_wrap_data *d = (_align_wrap_data *)data; + PyArray_StridedTransferFn wrapped = d->wrapped, + tobuffer = d->tobuffer, + frombuffer = d->frombuffer; + npy_intp dst_itemsize = d->dst_itemsize; + void *wrappeddata = d->wrappeddata, + *todata = d->todata, + *fromdata = d->fromdata; + char *bufferin = d->bufferin, *bufferout = d->bufferout; + + for(;;) { + if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { + tobuffer(bufferin, src_itemsize, src, src_stride, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + src_itemsize, todata); + memset(bufferout, 0, dst_itemsize*NPY_LOWLEVEL_BUFFER_BLOCKSIZE); + wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + src_itemsize, wrappeddata); + frombuffer(dst, dst_stride, bufferout, dst_itemsize, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + dst_itemsize, fromdata); + N -= NPY_LOWLEVEL_BUFFER_BLOCKSIZE; + src += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_stride; + dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; + } + else { + tobuffer(bufferin, src_itemsize, src, src_stride, N, + src_itemsize, todata); + memset(bufferout, 0, dst_itemsize*N); + wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N, + src_itemsize, wrappeddata); + frombuffer(dst, dst_stride, bufferout, dst_itemsize, N, + dst_itemsize, fromdata); + return; + } + } +} + +/* + * Wraps an aligned contig to contig transfer function between either + * copies or byte swaps to temporary buffers. + * + * src_itemsize/dst_itemsize - The sizes of the src and dst datatypes. + * tobuffer - copy/swap function from src to an aligned contiguous buffer. + * todata - data for tobuffer + * frombuffer - copy/swap function from an aligned contiguous buffer to dst. + * fromdata - data for frombuffer + * wrapped - contig to contig transfer function being wrapped + * wrappeddata - data for wrapped + * init_dest - 1 means to memset the dest buffer to 0 before calling wrapped. + * + * Returns NPY_SUCCEED or NPY_FAIL. + */ +NPY_NO_EXPORT int +PyArray_WrapAlignedContigTransferFunction( + npy_intp src_itemsize, npy_intp dst_itemsize, + PyArray_StridedTransferFn tobuffer, void *todata, + PyArray_StridedTransferFn frombuffer, void *fromdata, + PyArray_StridedTransferFn wrapped, void *wrappeddata, + int init_dest, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _align_wrap_data *data; + npy_intp basedatasize, datasize; + + /* Round up the structure size to 16-byte boundary */ + basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); + /* Add space for two low level buffers */ + datasize = basedatasize + + NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_itemsize + + NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_itemsize; + + /* Allocate the data, and populate it */ + data = (_align_wrap_data *)PyArray_malloc(datasize); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + data->freefunc = (void *)&_align_wrap_data_free; + data->copyfunc = (void *)&_align_wrap_data_copy; + data->tobuffer = tobuffer; + data->todata = todata; + data->frombuffer = frombuffer; + data->fromdata = fromdata; + data->wrapped = wrapped; + data->wrappeddata = wrappeddata; + data->src_itemsize = src_itemsize; + data->dst_itemsize = dst_itemsize; + data->bufferin = (char *)data + basedatasize; + data->bufferout = data->bufferin + + NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_itemsize; + + /* Set the function and data */ + if (init_dest) { + *outstransfer = &_strided_to_strided_contig_align_wrap_init_dest; + } + else { + *outstransfer = &_strided_to_strided_contig_align_wrap; + } + *outtransferdata = data; + + return NPY_SUCCEED; +} + +/*************************** DTYPE CAST FUNCTIONS *************************/ + +/* Does a simple aligned cast */ +typedef struct { + void *freefunc, *copyfunc; + PyArray_VectorUnaryFunc *castfunc; + PyArrayObject *aip, *aop; +} _strided_cast_data; + +/* strided cast data free function */ +void _strided_cast_data_free(_strided_cast_data *data) +{ + Py_DECREF(data->aip); + Py_DECREF(data->aop); + PyArray_free(data); +} + +/* strided cast data copy function */ +_strided_cast_data *_strided_cast_data_copy(_strided_cast_data *data) +{ + _strided_cast_data *newdata = + (_strided_cast_data *)PyArray_malloc(sizeof(_strided_cast_data)); + if (newdata == NULL) { + return NULL; + } + + memcpy(newdata, data, sizeof(_strided_cast_data)); + Py_INCREF(newdata->aip); + Py_INCREF(newdata->aop); + + return newdata; +} + +static void +_aligned_strided_to_strided_cast(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _strided_cast_data *d = (_strided_cast_data *)data; + PyArray_VectorUnaryFunc *castfunc = d->castfunc; + PyArrayObject *aip = d->aip, *aop = d->aop; + + while (N > 0) { + castfunc(src, dst, 1, aip, aop); + dst += dst_stride; + src += src_stride; + --N; + } +} + +/* This one requires src be of type NPY_OBJECT */ +static void +_aligned_strided_to_strided_cast_decref_src(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _strided_cast_data *d = (_strided_cast_data *)data; + PyArray_VectorUnaryFunc *castfunc = d->castfunc; + PyArrayObject *aip = d->aip, *aop = d->aop; + PyObject *src_ref; + + while (N > 0) { + castfunc(src, dst, 1, aip, aop); + + /* After casting, decrement the source ref */ + NPY_COPY_PYOBJECT_PTR(&src_ref, src); + Py_XDECREF(src_ref); + + dst += dst_stride; + src += src_stride; + --N; + } +} + +static void +_aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), + char *src, npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(itemsize), + void *data) +{ + _strided_cast_data *d = (_strided_cast_data *)data; + PyArray_VectorUnaryFunc *castfunc = d->castfunc; + PyArrayObject *aip = d->aip, *aop = d->aop; + + castfunc(src, dst, N, aip, aop); +} + +static int +get_cast_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _strided_cast_data *data; + PyArray_VectorUnaryFunc *castfunc; + npy_intp shape = 1, src_itemsize = src_dtype->elsize, + dst_itemsize = dst_dtype->elsize; + + if (src_dtype->type_num == dst_dtype->type_num) { + PyErr_SetString(PyExc_ValueError, + "low level cast function is for unequal type numbers"); + return NPY_FAIL; + } + + /* Get the cast function */ + castfunc = PyArray_GetCastFunc(src_dtype, dst_dtype->type_num); + if (!castfunc) { + *outstransfer = NULL; + *outtransferdata = NULL; + return NPY_FAIL; + } + + /* Allocate the data for the casting */ + data = (_strided_cast_data *)PyArray_malloc(sizeof(_strided_cast_data)); + if (data == NULL) { + PyErr_NoMemory(); + *outstransfer = NULL; + *outtransferdata = NULL; + return NPY_FAIL; + } + data->freefunc = (void*)&_strided_cast_data_free; + data->copyfunc = (void*)&_strided_cast_data_copy; + data->castfunc = castfunc; + /* + * TODO: This is a hack so the cast functions have an array. + * The cast functions shouldn't need that. + */ + Py_INCREF(src_dtype); + data->aip = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, src_dtype, + 1, &shape, NULL, NULL, 0, NULL); + if (data->aip == NULL) { + PyArray_free(data); + return NPY_FAIL; + } + /* + * TODO: This is a hack so the cast functions have an array. + * The cast functions shouldn't need that. + */ + Py_INCREF(dst_dtype); + data->aop = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dst_dtype, + 1, &shape, NULL, NULL, 0, NULL); + if (data->aop == NULL) { + Py_DECREF(data->aip); + PyArray_free(data); + return NPY_FAIL; + } + + + /* If it's aligned and all native byte order, we're all done */ + if (aligned && PyArray_ISNBO(src_dtype->byteorder) && + PyArray_ISNBO(dst_dtype->byteorder)) { + /* Choose the contiguous cast if we can */ + if (move_references && src_dtype->type_num == NPY_OBJECT) { + *outstransfer = _aligned_strided_to_strided_cast_decref_src; + } + else { + if (src_stride == src_itemsize && dst_stride == dst_itemsize) { + *outstransfer = _aligned_contig_to_contig_cast; + } + else { + *outstransfer = _aligned_strided_to_strided_cast; + } + } + *outtransferdata = data; + + return NPY_SUCCEED; + } + /* Otherwise, we have to copy and/or swap to aligned temporaries */ + else { + PyArray_StridedTransferFn tobuffer, frombuffer, casttransfer; + + /* Get the copy/swap operation from src */ + if (src_itemsize == 1 || PyArray_ISNBO(src_dtype->byteorder)) { + tobuffer = PyArray_GetStridedCopyFn(aligned, + src_stride, src_itemsize, + src_itemsize); + } + /* If it's not complex, one swap */ + else if(src_dtype->kind != 'c') { + tobuffer = PyArray_GetStridedCopySwapFn(aligned, + src_stride, src_itemsize, + src_itemsize); + } + /* If not complex, a paired swap */ + else { + tobuffer = PyArray_GetStridedCopySwapPairFn(aligned, + src_stride, src_itemsize, + src_itemsize); + } + + /* Get the copy/swap operation to dst */ + if (dst_itemsize == 1 || PyArray_ISNBO(dst_dtype->byteorder)) { + if (dst_dtype->type_num == NPY_OBJECT) { + frombuffer = &_strided_to_strided_move_references; + } + else { + frombuffer = PyArray_GetStridedCopyFn(aligned, + dst_itemsize, dst_stride, + dst_itemsize); + } + } + /* If it's not complex, one swap */ + else if(dst_dtype->kind != 'c') { + frombuffer = PyArray_GetStridedCopySwapFn(aligned, + dst_itemsize, dst_stride, + dst_itemsize); + } + /* If not complex, a paired swap */ + else { + frombuffer = PyArray_GetStridedCopySwapPairFn(aligned, + dst_itemsize, dst_stride, + dst_itemsize); + } + + if (frombuffer == NULL || tobuffer == NULL) { + PyArray_FreeStridedTransferData(data); + return NPY_FAIL; + } + + /* If necessary, use the cast function with source decref */ + if (move_references && src_dtype->type_num == NPY_OBJECT) { + *outstransfer = _aligned_strided_to_strided_cast_decref_src; + } + /* Use the aligned contiguous cast otherwise */ + else { + casttransfer = &_aligned_contig_to_contig_cast; + } + + /* Wrap it all up in a new transfer function + data */ + if (PyArray_WrapAlignedContigTransferFunction( + src_itemsize, dst_itemsize, + tobuffer, NULL, + frombuffer, NULL, + casttransfer, data, + PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), + outstransfer, outtransferdata) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(data); + return NPY_FAIL; + } + + return NPY_SUCCEED; + } +} + +/**************************** COPY 1 TO N CONTIGUOUS ************************/ + +/* Copies 1 element to N contiguous elements */ +typedef struct { + void *freefunc, *copyfunc; + PyArray_StridedTransferFn stransfer; + void *data; + npy_intp N, dst_itemsize; + /* If this is non-NULL the source type has references needing a decref */ + PyArray_StridedTransferFn stransfer_finish_src; + void *data_finish_src; +} _one_to_n_data; + +/* transfer data free function */ +void _one_to_n_data_free(_one_to_n_data *data) +{ + PyArray_FreeStridedTransferData(data->data); + PyArray_FreeStridedTransferData(data->data_finish_src); + PyArray_free(data); +} + +/* transfer data copy function */ +_one_to_n_data *_one_to_n_data_copy(_one_to_n_data *data) +{ + _one_to_n_data *newdata; + + /* Allocate the data, and populate it */ + newdata = (_one_to_n_data *)PyArray_malloc(sizeof(_one_to_n_data)); + if (newdata == NULL) { + return NULL; + } + memcpy(newdata, data, sizeof(_one_to_n_data)); + if (data->data != NULL) { + newdata->data = PyArray_CopyStridedTransferData(data->data); + if (newdata->data == NULL) { + PyArray_free(newdata); + return NULL; + } + } + if (data->data_finish_src != NULL) { + newdata->data_finish_src = + PyArray_CopyStridedTransferData(data->data_finish_src); + if (newdata->data_finish_src == NULL) { + PyArray_FreeStridedTransferData(newdata->data); + PyArray_free(newdata); + return NULL; + } + } + + return newdata; +} + +static void +_strided_to_strided_one_to_n(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _one_to_n_data *d = (_one_to_n_data *)data; + PyArray_StridedTransferFn subtransfer = d->stransfer; + void *subdata = d->data; + npy_intp subN = d->N, dst_itemsize = d->dst_itemsize; + + while (N > 0) { + subtransfer(dst, dst_itemsize, + src, 0, + subN, src_itemsize, + subdata); + + src += src_stride; + dst += dst_stride; + --N; + } +} + +static void +_strided_to_strided_one_to_n_with_finish(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _one_to_n_data *d = (_one_to_n_data *)data; + PyArray_StridedTransferFn subtransfer = d->stransfer, + stransfer_finish_src = d->stransfer_finish_src; + void *subdata = d->data, *data_finish_src = data_finish_src; + npy_intp subN = d->N, dst_itemsize = d->dst_itemsize; + + while (N > 0) { + subtransfer(dst, dst_itemsize, + src, 0, + subN, src_itemsize, + subdata); + + + stransfer_finish_src(NULL, 0, + src, 0, + 1, src_itemsize, + data_finish_src); + + src += src_stride; + dst += dst_stride; + --N; + } +} + +/* + * Wraps a transfer function to produce one that copies one element + * of src to N contiguous elements of dst. If stransfer_finish_src is + * not NULL, it should be a transfer function which just affects + * src, for example to do a final DECREF operation for references. + */ +static int +wrap_transfer_function_one_to_n( + PyArray_StridedTransferFn stransfer_inner, + void *data_inner, + PyArray_StridedTransferFn stransfer_finish_src, + void *data_finish_src, + npy_intp dst_itemsize, + npy_intp N, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _one_to_n_data *data; + + + data = PyArray_malloc(sizeof(_one_to_n_data)); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + + data->freefunc = &_one_to_n_data_free; + data->copyfunc = &_one_to_n_data_copy; + data->stransfer = stransfer_inner; + data->data = data_inner; + data->stransfer_finish_src = stransfer_finish_src; + data->data_finish_src = data_finish_src; + data->N = N; + data->dst_itemsize = dst_itemsize; + + if (stransfer_finish_src == NULL) { + *outstransfer = &_strided_to_strided_one_to_n; + } + else { + *outstransfer = &_strided_to_strided_one_to_n_with_finish; + } + *outtransferdata = data; + + return NPY_SUCCEED; +} + +static int +get_one_to_n_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + npy_intp N, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + PyArray_StridedTransferFn stransfer, stransfer_finish_src = NULL; + void *data, *data_finish_src = NULL; + + /* + * move_references is set to 0, handled in the wrapping transfer fn, + * src_stride is set to zero, because its 1 to N copying, + * and dst_stride is set to contiguous, because subarrays are always + * contiguous. + */ + if (PyArray_GetDTypeTransferFunction(aligned, + 0, dst_dtype->elsize, + src_dtype, dst_dtype, + 0, + &stransfer, &data) != NPY_SUCCEED) { + return NPY_FAIL; + } + + /* If the src object will need a DECREF, set src_dtype */ + if (move_references && PyDataType_REFCHK(src_dtype)) { + if (PyArray_GetDecSrcRefTransferFunction(aligned, + src_stride, + src_dtype, + &stransfer_finish_src, + &data_finish_src) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(data); + return NPY_FAIL; + } + } + + if (wrap_transfer_function_one_to_n(stransfer, data, + stransfer_finish_src, data_finish_src, + dst_dtype->elsize, + N, + outstransfer, outtransferdata) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(data); + PyArray_FreeStridedTransferData(data_finish_src); + return NPY_FAIL; + } + + return NPY_SUCCEED; +} + +/**************************** COPY N TO N CONTIGUOUS ************************/ + +/* Copies N contiguous elements to N contiguous elements */ +typedef struct { + void *freefunc, *copyfunc; + PyArray_StridedTransferFn stransfer; + void *data; + npy_intp N, src_itemsize, dst_itemsize; +} _n_to_n_data; + +/* transfer data free function */ +void _n_to_n_data_free(_n_to_n_data *data) +{ + PyArray_FreeStridedTransferData(data->data); + PyArray_free(data); +} + +/* transfer data copy function */ +_n_to_n_data *_n_to_n_data_copy(_n_to_n_data *data) +{ + _n_to_n_data *newdata; + + /* Allocate the data, and populate it */ + newdata = (_n_to_n_data *)PyArray_malloc(sizeof(_n_to_n_data)); + if (newdata == NULL) { + return NULL; + } + memcpy(newdata, data, sizeof(_n_to_n_data)); + if (newdata->data != NULL) { + newdata->data = PyArray_CopyStridedTransferData(data->data); + if (newdata->data == NULL) { + PyArray_free(newdata); + return NULL; + } + } + + return newdata; +} + +static void +_strided_to_strided_n_to_n(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + void *data) +{ + _n_to_n_data *d = (_n_to_n_data *)data; + PyArray_StridedTransferFn subtransfer = d->stransfer; + void *subdata = d->data; + npy_intp subN = d->N, src_subitemsize = d->src_itemsize, + dst_subitemsize = d->dst_itemsize; + + while (N > 0) { + subtransfer(dst, dst_subitemsize, + src, src_subitemsize, + subN, src_subitemsize, + subdata); + + src += src_stride; + dst += dst_stride; + --N; + } +} + +static void +_contig_to_contig_n_to_n(char *dst, npy_intp NPY_UNUSED(dst_stride), + char *src, npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *data) +{ + _n_to_n_data *d = (_n_to_n_data *)data; + PyArray_StridedTransferFn subtransfer = d->stransfer; + void *subdata = d->data; + npy_intp subN = d->N, src_subitemsize = d->src_itemsize, + dst_subitemsize = d->dst_itemsize; + + subtransfer(dst, dst_subitemsize, + src, src_subitemsize, + subN*N, src_subitemsize, + subdata); +} + +/* + * Wraps a transfer function to produce one that copies N contiguous elements + * of src to N contiguous elements of dst. + */ +static int +wrap_transfer_function_n_to_n( + PyArray_StridedTransferFn stransfer_inner, + void *data_inner, + npy_intp src_stride, npy_intp dst_stride, + npy_intp src_itemsize, npy_intp dst_itemsize, + npy_intp N, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _n_to_n_data *data; + + data = PyArray_malloc(sizeof(_n_to_n_data)); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + + data->freefunc = &_n_to_n_data_free; + data->copyfunc = &_n_to_n_data_copy; + data->stransfer = stransfer_inner; + data->data = data_inner; + data->N = N; + data->src_itemsize = src_itemsize; + data->dst_itemsize = dst_itemsize; + + /* + * If the N subarray elements exactly fit in the strides, + * then can do a faster contiguous transfer. + */ + if (src_stride == N * src_itemsize && + dst_stride == N * dst_itemsize) { + *outstransfer = &_contig_to_contig_n_to_n; + } + else { + *outstransfer = &_strided_to_strided_n_to_n; + } + *outtransferdata = data; + + return NPY_SUCCEED; +} + +static int +get_n_to_n_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + npy_intp N, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + PyArray_StridedTransferFn stransfer; + void *data; + + /* + * src_stride and dst_stride are set to contiguous, because + * subarrays are always contiguous. + */ + if (PyArray_GetDTypeTransferFunction(aligned, + src_dtype->elsize, dst_dtype->elsize, + src_dtype, dst_dtype, + move_references, + &stransfer, &data) != NPY_SUCCEED) { + return NPY_FAIL; + } + + if (wrap_transfer_function_n_to_n(stransfer, data, + src_stride, dst_stride, + src_dtype->elsize, dst_dtype->elsize, + N, + outstransfer, + outtransferdata) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(data); + return NPY_FAIL; + } + + return NPY_SUCCEED; +} + +/********************** COPY WITH SUBARRAY BROADCAST ************************/ + +/* Copies element with subarray broadcasting */ +typedef struct { + void *freefunc, *copyfunc; + PyArray_StridedTransferFn stransfer; + void *data; + npy_intp src_N, dst_N, src_itemsize, dst_itemsize; + /* If this is non-NULL the source type has references needing a decref */ + PyArray_Descr *src_dtype; + /* If this is non-NULL, the dest type has references needing a decref */ + PyArray_Descr *dst_dtype; + npy_intp offsets; +} _subarray_broadcast_data; + +/* transfer data free function */ +void _subarray_broadcast_data_free(_subarray_broadcast_data *data) +{ + PyArray_FreeStridedTransferData(data->data); + Py_XDECREF(data->src_dtype); + Py_XDECREF(data->dst_dtype); + PyArray_free(data); +} + +/* transfer data copy function */ +_subarray_broadcast_data *_subarray_broadcast_data_copy( + _subarray_broadcast_data *data) +{ + _subarray_broadcast_data *newdata; + npy_intp dst_N = data->dst_N, structsize; + + structsize = sizeof(_subarray_broadcast_data) + dst_N*NPY_SIZEOF_INTP; + + /* Allocate the data and populate it */ + newdata = (_subarray_broadcast_data *)PyArray_malloc(structsize); + if (newdata == NULL) { + return NULL; + } + memcpy(newdata, data, structsize); + if (data->data != NULL) { + newdata->data = PyArray_CopyStridedTransferData(data->data); + if (newdata->data == NULL) { + PyArray_free(newdata); + return NULL; + } + } + Py_XINCREF(newdata->src_dtype); + Py_XINCREF(newdata->dst_dtype); + + return newdata; +} + +static void +_strided_to_strided_subarray_broadcast(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *data) +{ + _subarray_broadcast_data *d = (_subarray_broadcast_data *)data; + PyArray_Descr *src_dtype = d->src_dtype, *dst_dtype = d->dst_dtype; + PyArray_StridedTransferFn subtransfer = d->stransfer; + void *subdata = d->data; + npy_intp i, dst_subN = d->dst_N, src_subN = d->src_N, + src_subitemsize = d->src_itemsize, + dst_subitemsize = d->dst_itemsize; + npy_intp *offsets = &d->offsets; + + if (src_dtype == NULL && dst_dtype == NULL) { + while (N > 0) { + for (i = 0; i < dst_subN; ++i) { + if (offsets[i] != -1) { + subtransfer(dst + i*dst_subitemsize, dst_subitemsize, + src + offsets[i], src_subitemsize, + 1, src_subitemsize, + subdata); + } + else { + char *tmp = dst + i*dst_subitemsize; + memset(tmp, 0, dst_subitemsize); + } + } + + src += src_stride; + dst += dst_stride; + --N; + } + } + else { + while (N > 0) { + for (i = 0; i < dst_subN; ++i) { + if (offsets[i] != -1) { + subtransfer(dst + i*dst_subitemsize, dst_subitemsize, + src + offsets[i], src_subitemsize, + 1, src_subitemsize, + subdata); + } + else { + char *tmp = dst + i*dst_subitemsize; + if (dst_dtype) { + PyArray_Item_XDECREF(tmp, dst_dtype); + } + memset(tmp, 0, dst_subitemsize); + } + } + + for (i = 0; i < src_subN; ++i) { + PyArray_Item_XDECREF(src + i*src_subitemsize, src_dtype); + } + + src += src_stride; + dst += dst_stride; + --N; + } + } +} + + +static int +get_subarray_broadcast_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + npy_intp src_size, npy_intp dst_size, + PyArray_Dims src_shape, PyArray_Dims dst_shape, + int move_references, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _subarray_broadcast_data *data; + npy_intp structsize, index, src_index, dst_index, i, ndim, *offsets; + + structsize = sizeof(_subarray_broadcast_data) + dst_size*NPY_SIZEOF_INTP; + + /* Allocate the data and populate it */ + data = (_subarray_broadcast_data *)PyArray_malloc(structsize); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + + /* + * move_references is set to 0, handled in the wrapping transfer fn, + * src_stride and dst_stride are set to contiguous, as N will always + * be 1 when it's called. + */ + if (PyArray_GetDTypeTransferFunction(aligned, + src_dtype->elsize, dst_dtype->elsize, + src_dtype, dst_dtype, + 0, + &data->stransfer, &data->data) != NPY_SUCCEED) { + PyArray_free(data); + return NPY_FAIL; + } + data->freefunc = &_subarray_broadcast_data_free; + data->copyfunc = &_subarray_broadcast_data_copy; + data->src_N = src_size; + data->dst_N = dst_size; + data->src_itemsize = src_dtype->elsize; + data->dst_itemsize = dst_dtype->elsize; + + /* If the src object will need a DECREF, set src_dtype */ + if (move_references && PyDataType_REFCHK(src_dtype)) { + data->src_dtype = src_dtype; + Py_INCREF(src_dtype); + } + else { + data->src_dtype = NULL; + } + + /* If the dst object needs a DECREF to set it to NULL, set dst_dtype */ + if (PyDataType_REFCHK(dst_dtype)) { + data->dst_dtype = dst_dtype; + Py_INCREF(dst_dtype); + } + else { + data->dst_dtype = NULL; + } + + /* Calculate the broadcasting and set the offsets */ + offsets = &data->offsets; + ndim = (src_shape.len > dst_shape.len) ? src_shape.len : dst_shape.len; + for (index = 0; index < dst_size; ++index) { + npy_intp src_factor = 1; + + dst_index = index; + src_index = 0; + for (i = ndim-1; i >= 0; --i) { + npy_intp coord = 0, shape; + + /* Get the dst coord of this index for dimension i */ + if (i >= ndim - dst_shape.len) { + shape = dst_shape.ptr[i-(ndim-dst_shape.len)]; + coord = dst_index % shape; + dst_index /= shape; + } + + /* Translate it into a src coord and update src_index */ + if (i >= ndim - src_shape.len) { + shape = src_shape.ptr[i-(ndim-src_shape.len)]; + if (shape == 1) { + coord = 0; + } + else { + if (coord < shape) { + src_index += src_factor*coord; + src_factor *= shape; + } + else { + /* Out of bounds, flag with -1 */ + src_index = -1; + break; + } + } + } + } + /* Set the offset */ + if (src_index == -1) { + offsets[index] = -1; + } + else { + offsets[index] = src_index * src_dtype->elsize; + } + } + + *outstransfer = &_strided_to_strided_subarray_broadcast; + *outtransferdata = data; + + return NPY_SUCCEED; +} + +/* + * Handles subarray transfer. To call this, at least one of the dtype's + * subarrays must be non-NULL + */ +static int +get_subarray_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + PyArray_Dims src_shape = {NULL, -1}, dst_shape = {NULL, -1}; + npy_intp src_size = 1, dst_size = 1; + + /* Get the subarray shapes and sizes */ + if (src_dtype->subarray != NULL) { + if (!(PyArray_IntpConverter(src_dtype->subarray->shape, + &src_shape))) { + PyErr_SetString(PyExc_ValueError, + "invalid subarray shape"); + return NPY_FAIL; + } + src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len); + src_dtype = src_dtype->subarray->base; + } + if (dst_dtype->subarray != NULL) { + if (!(PyArray_IntpConverter(dst_dtype->subarray->shape, + &dst_shape))) { + if (src_shape.ptr != NULL) { + PyDimMem_FREE(src_shape.ptr); + } + PyErr_SetString(PyExc_ValueError, + "invalid subarray shape"); + return NPY_FAIL; + } + dst_size = PyArray_MultiplyList(dst_shape.ptr, dst_shape.len); + dst_dtype = dst_dtype->subarray->base; + } + + /* + * Just a straight one-element copy. If the source size isn't 1, + * we copy the element at index 0. If the source data type is + * a reference and we're moving references, a DECREF for each + * source element would also be needed, so the general case will be + * used below + */ + if (dst_size == 1 && (src_size == 1 || + !move_references || + !PyDataType_REFCHK(src_dtype))) { + PyDimMem_FREE(src_shape.ptr); + PyDimMem_FREE(dst_shape.ptr); + + return PyArray_GetDTypeTransferFunction(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + outstransfer, outtransferdata); + } + /* Copy the src value to all the dst values */ + else if (src_size == 1) { + PyDimMem_FREE(src_shape.ptr); + PyDimMem_FREE(dst_shape.ptr); + + return get_one_to_n_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + dst_size, + outstransfer, outtransferdata); + } + /* If the shapes match exactly, do an n to n copy */ + else if (src_shape.len == dst_shape.len && + PyArray_CompareLists(src_shape.ptr, dst_shape.ptr, + src_shape.len)) { + PyDimMem_FREE(src_shape.ptr); + PyDimMem_FREE(dst_shape.ptr); + + return get_n_to_n_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + src_size, + outstransfer, outtransferdata); + } + /* + * Copy the subarray with broadcasting, truncating, and zero-padding + * as necessary. + */ + else { + int ret = get_subarray_broadcast_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + src_size, dst_size, + src_shape, dst_shape, + move_references, + outstransfer, outtransferdata); + + PyDimMem_FREE(src_shape.ptr); + PyDimMem_FREE(dst_shape.ptr); + return ret; + } +} + +/**************************** COPY FIELDS *******************************/ +typedef struct { + npy_intp src_offset, dst_offset, src_itemsize; + PyArray_StridedTransferFn stransfer; + void *data; +} _single_field_transfer; + +typedef struct { + void *freefunc, *copyfunc; + npy_intp field_count; + + _single_field_transfer fields; +} _field_transfer_data; + +/* transfer data free function */ +void _field_transfer_data_free(_field_transfer_data *data) +{ + npy_intp i, field_count = data->field_count; + _single_field_transfer *fields = &data->fields; + + for (i = 0; i < field_count; ++i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); +} + +/* transfer data copy function */ +_field_transfer_data *_field_transfer_data_copy( + _field_transfer_data *data) +{ + _field_transfer_data *newdata; + npy_intp i, field_count = data->field_count, structsize; + _single_field_transfer *fields, *newfields; + + structsize = sizeof(_field_transfer_data) + + field_count * sizeof(_single_field_transfer); + + /* Allocate the data and populate it */ + newdata = (_field_transfer_data *)PyArray_malloc(structsize); + if (newdata == NULL) { + return NULL; + } + memcpy(newdata, data, structsize); + /* Copy all the fields transfer data */ + fields = &data->fields; + newfields = &newdata->fields; + for (i = 0; i < field_count; ++i) { + if (fields[i].data != NULL) { + newfields[i].data = + PyArray_CopyStridedTransferData(fields[i].data); + if (newfields[i].data == NULL) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(newfields[i].data); + } + PyArray_free(newdata); + return NULL; + } + } + + } + + return newdata; +} + +static void +_strided_to_strided_field_transfer(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *data) +{ + _field_transfer_data *d = (_field_transfer_data *)data; + npy_intp i, field_count = d->field_count; + _single_field_transfer *field; + + /* Do the transfer a block at a time */ + for (;;) { + field = &d->fields; + if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { + for (i = 0; i < field_count; ++i, ++field) { + field->stransfer(dst + field->dst_offset, dst_stride, + src + field->src_offset, src_stride, + NPY_LOWLEVEL_BUFFER_BLOCKSIZE, + field->src_itemsize, + field->data); + } + N -= NPY_LOWLEVEL_BUFFER_BLOCKSIZE; + src += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_stride; + dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; + } + else { + for (i = 0; i < field_count; ++i, ++field) { + field->stransfer(dst + field->dst_offset, dst_stride, + src + field->src_offset, src_stride, + N, + field->src_itemsize, + field->data); + } + return; + } + } +} + +/* + * Handles fields transfer. To call this, at least one of the dtypes + * must have fields + */ +static int +get_fields_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + PyObject *names, *key, *tup, *title; + PyArray_Descr *src_fld_dtype, *dst_fld_dtype; + npy_int i, names_size, field_count, structsize; + int src_offset, dst_offset; + _field_transfer_data *data; + _single_field_transfer *fields; + + /* Copy the src value to all the fields of dst */ + if (!PyDescr_HASFIELDS(src_dtype)) { + names = dst_dtype->names; + names_size = PyTuple_GET_SIZE(dst_dtype->names); + + field_count = names_size; + structsize = sizeof(_field_transfer_data) + + (field_count + 1) * sizeof(_single_field_transfer); + /* Allocate the data and populate it */ + data = (_field_transfer_data *)PyArray_malloc(structsize); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + data->freefunc = &_field_transfer_data_free; + data->copyfunc = &_field_transfer_data_copy; + fields = &data->fields; + + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(dst_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &dst_fld_dtype, + &dst_offset, &title)) { + PyArray_free(data); + return NPY_FAIL; + } + if (PyArray_GetDTypeTransferFunction(0, + src_stride, dst_stride, + src_dtype, dst_fld_dtype, + 0, + &fields[i].stransfer, + &fields[i].data) != NPY_SUCCEED) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + return NPY_FAIL; + } + fields[i].src_offset = 0; + fields[i].dst_offset = dst_offset; + fields[i].src_itemsize = src_dtype->elsize; + } + + /* + * If the references should be removed from src, add + * another transfer function to do that. + */ + if (move_references && PyDataType_REFCHK(src_dtype)) { + if (PyArray_GetDecSrcRefTransferFunction(0, + src_stride, + src_dtype, + &fields[field_count].stransfer, + &fields[field_count].data) != NPY_SUCCEED) { + for (i = 0; i < field_count; ++i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + return NPY_FAIL; + } + fields[field_count].src_offset = 0; + fields[field_count].dst_offset = 0; + fields[field_count].src_itemsize = src_dtype->elsize; + field_count++; + } + data->field_count = field_count; + + *outstransfer = &_strided_to_strided_field_transfer; + *outtransferdata = data; + + return NPY_SUCCEED; + } + /* Copy the value of the first field to dst */ + else if (!PyDescr_HASFIELDS(dst_dtype)) { + names = src_dtype->names; + names_size = PyTuple_GET_SIZE(src_dtype->names); + + /* + * If DECREF is needed on source fields, may need + * to process all the fields + */ + if (move_references && PyDataType_REFCHK(src_dtype)) { + field_count = names_size; + } + else { + field_count = 1; + } + structsize = sizeof(_field_transfer_data) + + field_count * sizeof(_single_field_transfer); + /* Allocate the data and populate it */ + data = (_field_transfer_data *)PyArray_malloc(structsize); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + data->freefunc = &_field_transfer_data_free; + data->copyfunc = &_field_transfer_data_copy; + fields = &data->fields; + + key = PyTuple_GET_ITEM(names, 0); + tup = PyDict_GetItem(src_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, + &src_offset, &title)) { + PyArray_free(data); + return NPY_FAIL; + } + if (PyArray_GetDTypeTransferFunction(0, + src_stride, dst_stride, + src_fld_dtype, dst_dtype, + move_references, + &fields[0].stransfer, + &fields[0].data) != NPY_SUCCEED) { + PyArray_free(data); + return NPY_FAIL; + } + fields[0].src_offset = src_offset; + fields[0].dst_offset = 0; + fields[0].src_itemsize = src_dtype->elsize; + + /* + * If the references should be removed from src, add + * more transfer functions to decrement the references + * for all the other fields. + */ + if (move_references && PyDataType_REFCHK(src_dtype)) { + field_count = 1; + for (i = 1; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(src_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, + &src_offset, &title)) { + return NPY_FAIL; + } + if (PyDataType_REFCHK(src_fld_dtype)) { + if (PyArray_GetDecSrcRefTransferFunction(0, + src_stride, + src_fld_dtype, + &fields[field_count].stransfer, + &fields[field_count].data) != NPY_SUCCEED) { + for (i = field_count-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + return NPY_FAIL; + } + fields[field_count].src_offset = src_offset; + fields[field_count].dst_offset = 0; + fields[field_count].src_itemsize = src_fld_dtype->elsize; + field_count++; + } + } + } + + data->field_count = field_count; + + *outstransfer = &_strided_to_strided_field_transfer; + *outtransferdata = data; + + return NPY_SUCCEED; + } + /* Match up the fields to copy */ + else { + /* Keeps track of the names we already used */ + PyObject *used_names_dict = NULL; + + names = dst_dtype->names; + names_size = PyTuple_GET_SIZE(dst_dtype->names); + + /* + * If DECREF is needed on source fields, will need + * to also go through its fields. + */ + if (move_references && PyDataType_REFCHK(src_dtype)) { + field_count = names_size + PyTuple_GET_SIZE(src_dtype->names); + used_names_dict = PyDict_New(); + if (used_names_dict == NULL) { + return NPY_FAIL; + } + } + else { + field_count = names_size; + } + structsize = sizeof(_field_transfer_data) + + field_count * sizeof(_single_field_transfer); + /* Allocate the data and populate it */ + data = (_field_transfer_data *)PyArray_malloc(structsize); + if (data == NULL) { + PyErr_NoMemory(); + Py_XDECREF(used_names_dict); + return NPY_FAIL; + } + data->freefunc = &_field_transfer_data_free; + data->copyfunc = &_field_transfer_data_copy; + fields = &data->fields; + + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(dst_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &dst_fld_dtype, + &dst_offset, &title)) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + Py_XDECREF(used_names_dict); + return NPY_FAIL; + } + tup = PyDict_GetItem(src_dtype->fields, key); + if (tup != NULL) { + if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, + &src_offset, &title)) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + Py_XDECREF(used_names_dict); + return NPY_FAIL; + } + if (PyArray_GetDTypeTransferFunction(0, + src_stride, dst_stride, + src_fld_dtype, dst_fld_dtype, + move_references, + &fields[i].stransfer, + &fields[i].data) != NPY_SUCCEED) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + Py_XDECREF(used_names_dict); + return NPY_FAIL; + } + fields[i].src_offset = src_offset; + fields[i].dst_offset = dst_offset; + fields[i].src_itemsize = src_fld_dtype->elsize; + + if (used_names_dict != NULL) { + PyDict_SetItem(used_names_dict, key, Py_True); + } + } + else { + if (PyArray_GetSetDstZeroTransferFunction(0, + dst_stride, + dst_fld_dtype, + &fields[i].stransfer, + &fields[i].data) != NPY_SUCCEED) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + Py_XDECREF(used_names_dict); + return NPY_FAIL; + } + fields[i].src_offset = 0; + fields[i].dst_offset = dst_offset; + fields[i].src_itemsize = 0; + } + } + + if (move_references && PyDataType_REFCHK(src_dtype)) { + /* Use field_count to track additional functions added */ + field_count = names_size; + + names = src_dtype->names; + names_size = PyTuple_GET_SIZE(src_dtype->names); + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + if (PyDict_GetItem(used_names_dict, key) == NULL) { + tup = PyDict_GetItem(src_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, + &src_offset, &title)) { + for (i = field_count-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + Py_XDECREF(used_names_dict); + return NPY_FAIL; + } + if (PyDataType_REFCHK(src_fld_dtype)) { + if (PyArray_GetDecSrcRefTransferFunction(0, + src_stride, + src_fld_dtype, + &fields[field_count].stransfer, + &fields[field_count].data) != NPY_SUCCEED) { + for (i = field_count-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + return NPY_FAIL; + } + fields[field_count].src_offset = src_offset; + fields[field_count].dst_offset = 0; + fields[field_count].src_itemsize = + src_fld_dtype->elsize; + field_count++; + } + } + } + } + + Py_XDECREF(used_names_dict); + + data->field_count = field_count; + + *outstransfer = &_strided_to_strided_field_transfer; + *outtransferdata = data; + + return NPY_SUCCEED; + } +} + +static int +get_decsrcref_fields_transfer_function(int aligned, + npy_intp src_stride, + PyArray_Descr *src_dtype, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + PyObject *names, *key, *tup, *title; + PyArray_Descr *src_fld_dtype; + npy_int i, names_size, field_count, structsize; + int src_offset; + _field_transfer_data *data; + _single_field_transfer *fields; + + names = src_dtype->names; + names_size = PyTuple_GET_SIZE(src_dtype->names); + + field_count = names_size; + structsize = sizeof(_field_transfer_data) + + field_count * sizeof(_single_field_transfer); + /* Allocate the data and populate it */ + data = (_field_transfer_data *)PyArray_malloc(structsize); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + data->freefunc = &_field_transfer_data_free; + data->copyfunc = &_field_transfer_data_copy; + fields = &data->fields; + + field_count = 0; + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(src_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, + &src_offset, &title)) { + PyArray_free(data); + return NPY_FAIL; + } + if (PyDataType_REFCHK(src_fld_dtype)) { + if (PyArray_GetDecSrcRefTransferFunction(0, + src_stride, + src_fld_dtype, + &fields[field_count].stransfer, + &fields[field_count].data) != NPY_SUCCEED) { + for (i = field_count-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + return NPY_FAIL; + } + fields[field_count].src_offset = src_offset; + fields[field_count].dst_offset = 0; + fields[field_count].src_itemsize = src_dtype->elsize; + field_count++; + } + } + + data->field_count = field_count; + + *outstransfer = &_strided_to_strided_field_transfer; + *outtransferdata = data; + + return NPY_SUCCEED; +} + +static int +get_setdestzero_fields_transfer_function(int aligned, + npy_intp dst_stride, + PyArray_Descr *dst_dtype, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + PyObject *names, *key, *tup, *title; + PyArray_Descr *dst_fld_dtype; + npy_int i, names_size, field_count, structsize; + int dst_offset; + _field_transfer_data *data; + _single_field_transfer *fields; + + names = dst_dtype->names; + names_size = PyTuple_GET_SIZE(dst_dtype->names); + + field_count = names_size; + structsize = sizeof(_field_transfer_data) + + field_count * sizeof(_single_field_transfer); + /* Allocate the data and populate it */ + data = (_field_transfer_data *)PyArray_malloc(structsize); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + data->freefunc = &_field_transfer_data_free; + data->copyfunc = &_field_transfer_data_copy; + fields = &data->fields; + + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(dst_dtype->fields, key); + if (!PyArg_ParseTuple(tup, "Oi|O", &dst_fld_dtype, + &dst_offset, &title)) { + PyArray_free(data); + return NPY_FAIL; + } + if (PyArray_GetSetDstZeroTransferFunction(0, + dst_stride, + dst_fld_dtype, + &fields[i].stransfer, + &fields[i].data) != NPY_SUCCEED) { + for (i = i-1; i >= 0; --i) { + PyArray_FreeStridedTransferData(fields[i].data); + } + PyArray_free(data); + return NPY_FAIL; + } + fields[i].src_offset = 0; + fields[i].dst_offset = dst_offset; + fields[i].src_itemsize = 0; + } + + data->field_count = field_count; + + *outstransfer = &_strided_to_strided_field_transfer; + *outtransferdata = data; + + return NPY_SUCCEED; +} + +NPY_NO_EXPORT int +PyArray_GetDTypeTransferFunction(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + npy_intp src_itemsize = src_dtype->elsize, + dst_itemsize = dst_dtype->elsize; + int src_type_num = src_dtype->type_num, + dst_type_num = dst_dtype->type_num; + + /* First look at the possibilities of just a copy or swap */ + if (src_itemsize == dst_itemsize && src_dtype->kind == dst_dtype->kind && + src_type_num < NPY_NTYPES && dst_type_num < NPY_NTYPES && + !PyDataType_HASFIELDS(src_dtype) && + !PyDataType_HASFIELDS(dst_dtype) && + src_dtype->subarray == NULL && dst_dtype->subarray == NULL) { + /* The special types, which have no byte-order */ + switch (src_type_num) { + case NPY_VOID: + case NPY_STRING: + case NPY_UNICODE: + *outstransfer = PyArray_GetStridedCopyFn(0, + src_stride, dst_stride, + src_itemsize); + *outtransferdata = NULL; + return NPY_SUCCEED; + case NPY_OBJECT: + if (move_references) { + *outstransfer = &_strided_to_strided_move_references; + *outtransferdata = NULL; + } + else { + *outstransfer = &_strided_to_strided_copy_references; + *outtransferdata = NULL; + } + return NPY_SUCCEED; + } + + /* This is a straight copy */ + if (src_itemsize == 1 || PyArray_ISNBO(src_dtype->byteorder) == + PyArray_ISNBO(dst_dtype->byteorder)) { + *outstransfer = PyArray_GetStridedCopyFn(aligned, + src_stride, dst_stride, + src_itemsize); + *outtransferdata = NULL; + return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; + } + /* This is a straight copy + byte swap */ + else if (!PyTypeNum_ISCOMPLEX(src_type_num)) { + *outstransfer = PyArray_GetStridedCopySwapFn(aligned, + src_stride, dst_stride, + src_itemsize); + *outtransferdata = NULL; + return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; + } + /* This is a straight copy + element pair byte swap */ + else { + *outstransfer = PyArray_GetStridedCopySwapPairFn(aligned, + src_stride, dst_stride, + src_itemsize); + *outtransferdata = NULL; + return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; + } + } + + /* Handle subarrays */ + if (src_dtype->subarray != NULL || dst_dtype->subarray != NULL) { + return get_subarray_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + outstransfer, outtransferdata); + } + + /* Handle fields */ + if (PyDataType_HASFIELDS(src_dtype) || + PyDataType_HASFIELDS(dst_dtype)) { + return get_fields_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + outstransfer, outtransferdata); + } + + /* Check for different-sized strings, unicodes, or voids */ + if (src_type_num == dst_type_num) switch (src_type_num) { + case NPY_STRING: + case NPY_UNICODE: + case NPY_VOID: + return PyArray_GetStridedZeroPadCopyFn(0, + src_stride, dst_stride, + src_dtype->elsize, dst_dtype->elsize, + outstransfer, outtransferdata); + } + + /* Otherwise a cast is necessary */ + return get_cast_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + outstransfer, outtransferdata); +} + +/*************************** DEST SETZERO *******************************/ + +/* Sets dest to zero */ +typedef struct { + void *freefunc, *copyfunc; + npy_intp dst_itemsize; +} _dst_memset_zero_data; + +/* zero-padded data copy function */ +_dst_memset_zero_data *_dst_memset_zero_data_copy( + _dst_memset_zero_data *data) +{ + _dst_memset_zero_data *newdata = + (_dst_memset_zero_data *)PyArray_malloc( + sizeof(_dst_memset_zero_data)); + if (newdata == NULL) { + return NULL; + } + + memcpy(newdata, data, sizeof(_dst_memset_zero_data)); + + return newdata; +} + +static void +_null_to_strided_memset_zero(char *dst, + npy_intp dst_stride, + char *NPY_UNUSED(src), npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *data) +{ + _dst_memset_zero_data *d = (_dst_memset_zero_data *)data; + npy_intp dst_itemsize = d->dst_itemsize; + + while (N > 0) { + memset(dst, 0, dst_itemsize); + dst += dst_stride; + --N; + } +} + +static void +_null_to_contig_memset_zero(char *dst, + npy_intp dst_stride, + char *NPY_UNUSED(src), npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *data) +{ + _dst_memset_zero_data *d = (_dst_memset_zero_data *)data; + npy_intp dst_itemsize = d->dst_itemsize; + + memset(dst, 0, N*dst_itemsize); +} + +static void +_null_to_strided_reference_setzero(char *dst, + npy_intp dst_stride, + char *NPY_UNUSED(src), npy_intp NPY_UNUSED(src_stride), + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *NPY_UNUSED(data)) +{ + PyObject *dst_ref = NULL; + + while (N > 0) { + NPY_COPY_PYOBJECT_PTR(&dst_ref, dst); + + /* Release the reference in dst */ + Py_XDECREF(dst_ref); + + /* Set it to zero */ + dst_ref = NULL; + NPY_COPY_PYOBJECT_PTR(dst, &dst_ref); + + dst += dst_stride; + --N; + } +} + +NPY_NO_EXPORT int +PyArray_GetSetDstZeroTransferFunction(int aligned, + npy_intp dst_stride, + PyArray_Descr *dst_dtype, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + _dst_memset_zero_data *data; + + /* If there are no references, just set the whole thing to zero */ + if (!PyDataType_REFCHK(dst_dtype)) { + data = (_dst_memset_zero_data *) + PyArray_malloc(sizeof(_dst_memset_zero_data)); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + + data->freefunc = &PyArray_free; + data->copyfunc = &_dst_memset_zero_data_copy; + data->dst_itemsize = dst_dtype->elsize; + + if (dst_stride == data->dst_itemsize) { + *outstransfer = &_null_to_contig_memset_zero; + } + else { + *outstransfer = &_null_to_strided_memset_zero; + } + *outtransferdata = data; + + return NPY_SUCCEED; + } + /* If it's exactly one reference, use the decref function */ + else if (dst_dtype->type_num == NPY_OBJECT) { + *outstransfer = &_null_to_strided_reference_setzero; + *outtransferdata = NULL; + + return NPY_SUCCEED; + } + /* If there are subarrays, need to wrap it */ + else if (dst_dtype->subarray != NULL) { + PyArray_Dims dst_shape = {NULL, -1}; + npy_intp dst_size = 1; + PyArray_StridedTransferFn stransfer; + void *data; + + if (!(PyArray_IntpConverter(dst_dtype->subarray->shape, + &dst_shape))) { + PyErr_SetString(PyExc_ValueError, + "invalid subarray shape"); + return NPY_FAIL; + } + dst_size = PyArray_MultiplyList(dst_shape.ptr, dst_shape.len); + PyDimMem_FREE(dst_shape.ptr); + + /* Get a function for contiguous dst of the subarray type */ + if (PyArray_GetSetDstZeroTransferFunction(aligned, + dst_dtype->subarray->base->elsize, + dst_dtype->subarray->base, + &stransfer, &data) != NPY_SUCCEED) { + return NPY_FAIL; + } + + if (wrap_transfer_function_n_to_n(stransfer, data, + 0, dst_stride, + 0, dst_dtype->subarray->base->elsize, + dst_size, + outstransfer, outtransferdata) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(data); + return NPY_FAIL; + } + + return NPY_SUCCEED; + } + /* If there are fields, need to do each field */ + else if (PyDataType_HASFIELDS(dst_dtype)) { + return get_setdestzero_fields_transfer_function(aligned, + dst_stride, dst_dtype, + outstransfer, + outtransferdata); + } +} + +static void +_dec_src_ref_nop(char *NPY_UNUSED(dst), + npy_intp NPY_UNUSED(dst_stride), + char *NPY_UNUSED(src), npy_intp NPY_UNUSED(src_stride), + npy_intp NPY_UNUSED(N), + npy_intp NPY_UNUSED(src_itemsize), + void *NPY_UNUSED(data)) +{ + /* NOP */ +} + +static void +_strided_to_null_dec_src_ref_reference(char *NPY_UNUSED(dst), + npy_intp NPY_UNUSED(dst_stride), + char *src, npy_intp src_stride, + npy_intp N, + npy_intp NPY_UNUSED(src_itemsize), + void *NPY_UNUSED(data)) +{ + PyObject *src_ref = NULL; + while (N > 0) { + NPY_COPY_PYOBJECT_PTR(&src_ref, src); + + /* Release the reference in src */ + Py_XDECREF(src_ref); + + src += src_stride; + --N; + } +} + + +NPY_NO_EXPORT int +PyArray_GetDecSrcRefTransferFunction(int aligned, + npy_intp src_stride, + PyArray_Descr *src_dtype, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata) +{ + /* If there are no references, it's a nop */ + if (!PyDataType_REFCHK(src_dtype)) { + *outstransfer = &_dec_src_ref_nop; + *outtransferdata = NULL; + + return NPY_SUCCEED; + } + /* If it's a single reference, it's one decref */ + else if (src_dtype->type_num == NPY_OBJECT) { + *outstransfer = &_strided_to_null_dec_src_ref_reference; + *outtransferdata = NULL; + + return NPY_SUCCEED; + } + /* If there are subarrays, need to wrap it */ + else if (src_dtype->subarray != NULL) { + PyArray_Dims src_shape = {NULL, -1}; + npy_intp src_size = 1; + PyArray_StridedTransferFn stransfer; + void *data; + + if (!(PyArray_IntpConverter(src_dtype->subarray->shape, + &src_shape))) { + PyErr_SetString(PyExc_ValueError, + "invalid subarray shape"); + return NPY_FAIL; + } + src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len); + PyDimMem_FREE(src_shape.ptr); + + /* Get a function for contiguous src of the subarray type */ + if (PyArray_GetDecSrcRefTransferFunction(aligned, + src_dtype->subarray->base->elsize, + src_dtype->subarray->base, + &stransfer, &data) != NPY_SUCCEED) { + return NPY_FAIL; + } + + if (wrap_transfer_function_n_to_n(stransfer, data, + src_stride, 0, + src_dtype->subarray->base->elsize, 0, + src_size, + outstransfer, outtransferdata) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(data); + return NPY_FAIL; + } + + return NPY_SUCCEED; + } + /* If there are fields, need to do each field */ + else { + return get_decsrcref_fields_transfer_function(aligned, + src_stride, src_dtype, + outstransfer, + outtransferdata); + } +} + diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 06840335a..c0b35df4b 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -21,8 +21,6 @@ # define NPY_USE_UNALIGNED_ACCESS 0 #endif -#define NPY_LOWLEVEL_BUFFER_BLOCKSIZE 128 - #define _NPY_NOP1(x) (x) #define _NPY_NOP2(x) (x) #define _NPY_NOP4(x) (x) @@ -658,1215 +656,6 @@ NPY_NO_EXPORT PyArray_StridedTransferFn /**end repeat**/ -/*************************** COPY REFERENCES *******************************/ - -/* Moves references from src to dst */ -static void -_strided_to_strided_move_references(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - PyObject *src_ref = NULL, *dst_ref = NULL; - while (N > 0) { - NPY_COPY_PYOBJECT_PTR(&src_ref, src); - NPY_COPY_PYOBJECT_PTR(&dst_ref, dst); - - /* Release the reference in dst */ - Py_XDECREF(dst_ref); - /* Move the reference */ - NPY_COPY_PYOBJECT_PTR(dst, &src_ref); - /* Set the source reference to NULL */ - src_ref = NULL; - NPY_COPY_PYOBJECT_PTR(src, &src_ref); - - src += src_stride; - dst += dst_stride; - --N; - } -} - -/* Copies references from src to dst */ -static void -_strided_to_strided_copy_references(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - PyObject *src_ref = NULL, *dst_ref = NULL; - while (N > 0) { - NPY_COPY_PYOBJECT_PTR(&src_ref, src); - NPY_COPY_PYOBJECT_PTR(&dst_ref, dst); - - /* Release the reference in dst */ - Py_XDECREF(dst_ref); - /* Copy the reference */ - NPY_COPY_PYOBJECT_PTR(dst, &src_ref); - /* Claim the reference */ - Py_XINCREF(src_ref); - - src += src_stride; - dst += dst_stride; - --N; - } -} - -/************************** ZERO-PADDED COPY ******************************/ - -/* Does a zero-padded copy */ -typedef struct { - void *freefunc, *copyfunc; - npy_intp dst_itemsize; -} _strided_zero_pad_data; - -/* zero-padded data copy function */ -_strided_zero_pad_data *_strided_zero_pad_data_copy( - _strided_zero_pad_data *data) -{ - _strided_zero_pad_data *newdata = - (_strided_zero_pad_data *)PyArray_malloc( - sizeof(_strided_zero_pad_data)); - if (newdata == NULL) { - return NULL; - } - - memcpy(newdata, data, sizeof(_strided_zero_pad_data)); - - return newdata; -} - -/* - * Does a strided to strided zero-padded copy for the case where - * dst_itemsize > src_itemsize - */ -static void -_strided_to_strided_zero_pad_copy(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - _strided_zero_pad_data *d = (_strided_zero_pad_data *)data; - npy_intp dst_itemsize = d->dst_itemsize; - npy_intp zero_size = dst_itemsize-src_itemsize; - - while (N > 0) { - memcpy(dst, src, src_itemsize); - memset(dst + src_itemsize, 0, zero_size); - src += src_stride; - dst += dst_stride; - --N; - } -} - -NPY_NO_EXPORT int -PyArray_GetStridedZeroPadCopyFn(npy_intp aligned, - npy_intp src_stride, npy_intp dst_stride, - npy_intp src_itemsize, npy_intp dst_itemsize, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - if (src_itemsize >= dst_itemsize) { - /* If the sizes are different, the alignment flag isn't trustworthy */ - if (src_itemsize != dst_itemsize) { - aligned = 0; - } - *outstransfer = PyArray_GetStridedCopyFn(aligned, src_stride, - dst_stride, dst_itemsize); - *outtransferdata = NULL; - return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - else { - _strided_zero_pad_data *d = PyArray_malloc( - sizeof(_strided_zero_pad_data)); - if (d == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - d->dst_itemsize = dst_itemsize; - d->freefunc = &PyArray_free; - d->copyfunc = &_strided_zero_pad_data_copy; - - *outstransfer = &_strided_to_strided_zero_pad_copy; - *outtransferdata = d; - return NPY_SUCCEED; - } -} - -/***************** WRAP ALIGNED CONTIGUOUS TRANFSER FUNCTION **************/ - -/* Wraps a transfer function + data in alignment code */ -typedef struct { - void *freefunc, *copyfunc; - PyArray_StridedTransferFn wrapped, - tobuffer, frombuffer; - void *wrappeddata; - npy_intp src_itemsize, dst_itemsize; - char *bufferin, *bufferout; -} _align_wrap_data; - -/* transfer data free function */ -void _align_wrap_data_free(_align_wrap_data *data) -{ - PyArray_FreeStridedTransferData(data->wrappeddata); - PyArray_free(data); -} - -/* transfer data copy function */ -_align_wrap_data *_align_wrap_data_copy(_align_wrap_data *data) -{ - _align_wrap_data *newdata; - npy_intp basedatasize, datasize; - - /* Round up the structure size to 16-byte boundary */ - basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); - /* Add space for two low level buffers */ - datasize = basedatasize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*data->src_itemsize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*data->dst_itemsize; - - /* Allocate the data, and populate it */ - newdata = (_align_wrap_data *)PyArray_malloc(datasize); - if (newdata == NULL) { - return NULL; - } - memcpy(newdata, data, basedatasize); - newdata->bufferin = (char *)newdata + basedatasize; - newdata->bufferout = newdata->bufferin + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*newdata->src_itemsize; - if (newdata->wrappeddata != NULL) { - newdata->wrappeddata = - PyArray_CopyStridedTransferData(data->wrappeddata); - if (newdata->wrappeddata == NULL) { - PyArray_free(newdata); - return NULL; - } - } - - return newdata; -} - -static void -_strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - _align_wrap_data *d = (_align_wrap_data *)data; - PyArray_StridedTransferFn wrapped = d->wrapped, - tobuffer = d->tobuffer, - frombuffer = d->frombuffer; - npy_intp dst_itemsize = d->dst_itemsize; - void *wrappeddata = d->wrappeddata; - char *bufferin = d->bufferin, *bufferout = d->bufferout; - - for(;;) { - if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { - tobuffer(bufferin, src_itemsize, src, src_stride, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - src_itemsize, NULL); - wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - src_itemsize, wrappeddata); - frombuffer(dst, dst_stride, bufferout, dst_itemsize, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - dst_itemsize, NULL); - N -= NPY_LOWLEVEL_BUFFER_BLOCKSIZE; - src += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_stride; - dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; - } - else { - tobuffer(bufferin, src_itemsize, src, src_stride, N, - src_itemsize, NULL); - wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N, - src_itemsize, wrappeddata); - frombuffer(dst, dst_stride, bufferout, dst_itemsize, N, - dst_itemsize, NULL); - return; - } - } -} - -/* - * Wraps an aligned contig to contig transfer function between either - * copies or byte swaps to temporary buffers. - * - * src_itemsize/dst_itemsize - The sizes of the src and dst datatypes. - * tobuffer - copy/swap function from src to an aligned contiguous buffer. - * data passed to 'tobuffer' is NULL. - * frombuffer - copy/swap function from an aligned contiguous buffer to dst. - * data passed to 'frombuffer' is NULL. - * wrapped - contig to contig transfer function being wrapped - * wrappeddata - data for wrapped - * - * Returns NPY_SUCCEED or NPY_FAIL. - */ -NPY_NO_EXPORT int -PyArray_WrapAlignedContigTransferFunction( - npy_intp src_itemsize, npy_intp dst_itemsize, - PyArray_StridedTransferFn tobuffer, - PyArray_StridedTransferFn frombuffer, - PyArray_StridedTransferFn wrapped, void *wrappeddata, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - _align_wrap_data *data; - npy_intp basedatasize, datasize; - - /* Round up the structure size to 16-byte boundary */ - basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); - /* Add space for two low level buffers */ - datasize = basedatasize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_itemsize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_itemsize; - - /* Allocate the data, and populate it */ - data = (_align_wrap_data *)PyArray_malloc(datasize); - if (data == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - data->freefunc = (void *)&_align_wrap_data_free; - data->copyfunc = (void *)&_align_wrap_data_copy; - data->tobuffer = tobuffer; - data->frombuffer = frombuffer; - data->wrapped = wrapped; - data->wrappeddata = wrappeddata; - data->src_itemsize = src_itemsize; - data->dst_itemsize = dst_itemsize; - data->bufferin = (char *)data + basedatasize; - data->bufferout = data->bufferin + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_itemsize; - - /* Set the function and data */ - *outstransfer = &_strided_to_strided_contig_align_wrap; - *outtransferdata = data; - - return NPY_SUCCEED; -} - -/*************************** DTYPE CAST FUNCTIONS *************************/ - -/* Does a simple aligned cast */ -typedef struct { - void *freefunc, *copyfunc; - PyArray_VectorUnaryFunc *castfunc; - PyArrayObject *aip, *aop; -} _strided_cast_data; - -/* strided cast data free function */ -void _strided_cast_data_free(_strided_cast_data *data) -{ - Py_DECREF(data->aip); - Py_DECREF(data->aop); - PyArray_free(data); -} - -/* strided cast data copy function */ -_strided_cast_data *_strided_cast_data_copy(_strided_cast_data *data) -{ - _strided_cast_data *newdata = - (_strided_cast_data *)PyArray_malloc(sizeof(_strided_cast_data)); - if (newdata == NULL) { - return NULL; - } - - memcpy(newdata, data, sizeof(_strided_cast_data)); - Py_INCREF(newdata->aip); - Py_INCREF(newdata->aop); - - return newdata; -} - -static void -_aligned_strided_to_strided_cast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - _strided_cast_data *d = (_strided_cast_data *)data; - PyArray_VectorUnaryFunc *castfunc = d->castfunc; - PyArrayObject *aip = d->aip, *aop = d->aop; - - while (N > 0) { - castfunc(src, dst, 1, aip, aop); - dst += dst_stride; - src += src_stride; - --N; - } -} - -static void -_aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), - char *src, npy_intp NPY_UNUSED(src_stride), - npy_intp N, npy_intp NPY_UNUSED(itemsize), - void *data) -{ - _strided_cast_data *d = (_strided_cast_data *)data; - PyArray_VectorUnaryFunc *castfunc = d->castfunc; - PyArrayObject *aip = d->aip, *aop = d->aop; - - castfunc(src, dst, N, aip, aop); -} - -static int -get_cast_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - _strided_cast_data *data; - PyArray_VectorUnaryFunc *castfunc; - npy_intp shape = 1, src_itemsize = src_dtype->elsize, - dst_itemsize = dst_dtype->elsize; - - /* Get the cast function */ - castfunc = PyArray_GetCastFunc(src_dtype, dst_dtype->type_num); - if (!castfunc) { - *outstransfer = NULL; - *outtransferdata = NULL; - return NPY_FAIL; - } - - /* Allocate the data for the casting */ - data = (_strided_cast_data *)PyArray_malloc(sizeof(_strided_cast_data)); - if (data == NULL) { - PyErr_NoMemory(); - *outstransfer = NULL; - *outtransferdata = NULL; - return NPY_FAIL; - } - data->freefunc = (void*)&_strided_cast_data_free; - data->copyfunc = (void*)&_strided_cast_data_copy; - data->castfunc = castfunc; - Py_INCREF(src_dtype); - data->aip = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, src_dtype, - 1, &shape, NULL, NULL, 0, NULL); - if (data->aip == NULL) { - PyArray_free(data); - return NPY_FAIL; - } - Py_INCREF(dst_dtype); - data->aop = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dst_dtype, - 1, &shape, NULL, NULL, 0, NULL); - if (data->aop == NULL) { - Py_DECREF(data->aip); - PyArray_free(data); - return NPY_FAIL; - } - - - /* If it's aligned and all native byte order, we're all done */ - if (aligned && PyArray_ISNBO(src_dtype->byteorder) && - PyArray_ISNBO(dst_dtype->byteorder)) { - /* Choose the contiguous cast if we can */ - if (src_stride == src_itemsize && dst_stride == dst_itemsize) { - *outstransfer = _aligned_contig_to_contig_cast; - } - else { - *outstransfer = _aligned_strided_to_strided_cast; - } - *outtransferdata = data; - - return NPY_SUCCEED; - } - /* Otherwise, we have to copy and/or swap to aligned temporaries */ - else { - PyArray_StridedTransferFn tobuffer, frombuffer, casttransfer; - - /* Get the copy/swap operation from src */ - if (src_itemsize == 1 || PyArray_ISNBO(src_dtype->byteorder)) { - tobuffer = PyArray_GetStridedCopyFn(aligned, - src_stride, src_itemsize, - src_itemsize); - } - /* If it's not complex, one swap */ - else if(src_dtype->kind != 'c') { - tobuffer = PyArray_GetStridedCopySwapFn(aligned, - src_stride, src_itemsize, - src_itemsize); - } - /* If not complex, a paired swap */ - else { - tobuffer = PyArray_GetStridedCopySwapPairFn(aligned, - src_stride, src_itemsize, - src_itemsize); - } - - /* Get the copy/swap operation to dst */ - if (dst_itemsize == 1 || PyArray_ISNBO(dst_dtype->byteorder)) { - frombuffer = PyArray_GetStridedCopyFn(aligned, - dst_itemsize, dst_stride, - dst_itemsize); - } - /* If it's not complex, one swap */ - else if(dst_dtype->kind != 'c') { - frombuffer = PyArray_GetStridedCopySwapFn(aligned, - dst_itemsize, dst_stride, - dst_itemsize); - } - /* If not complex, a paired swap */ - else { - frombuffer = PyArray_GetStridedCopySwapPairFn(aligned, - dst_itemsize, dst_stride, - dst_itemsize); - } - - if (frombuffer == NULL || tobuffer == NULL) { - PyArray_FreeStridedTransferData(data); - return NPY_FAIL; - } - - /* Use the aligned contiguous cast */ - casttransfer = &_aligned_contig_to_contig_cast; - - /* Wrap it all up in a new transfer function + data */ - if (PyArray_WrapAlignedContigTransferFunction( - src_itemsize, dst_itemsize, - tobuffer, frombuffer, - casttransfer, data, - outstransfer, outtransferdata) != NPY_SUCCEED) { - PyArray_FreeStridedTransferData(data); - return NPY_FAIL; - } - - return NPY_SUCCEED; - } -} - -/**************************** COPY 1 TO N CONTIGUOUS ************************/ - -/* Copies 1 element to N contiguous elements */ -typedef struct { - void *freefunc, *copyfunc; - PyArray_StridedTransferFn stransfer; - void *data; - npy_intp N, dst_itemsize; - /* If this is non-NULL the source type has references needing a decref */ - PyArray_Descr *src_dtype; -} _one_to_n_data; - -/* transfer data free function */ -void _one_to_n_data_free(_one_to_n_data *data) -{ - PyArray_FreeStridedTransferData(data->data); - Py_XDECREF(data->src_dtype); - PyArray_free(data); -} - -/* transfer data copy function */ -_one_to_n_data *_one_to_n_data_copy(_one_to_n_data *data) -{ - _one_to_n_data *newdata; - - /* Allocate the data, and populate it */ - newdata = (_one_to_n_data *)PyArray_malloc(sizeof(_one_to_n_data)); - if (newdata == NULL) { - return NULL; - } - memcpy(newdata, data, sizeof(_one_to_n_data)); - newdata->data = PyArray_CopyStridedTransferData(data->data); - if (newdata->data == NULL) { - PyArray_free(newdata); - return NULL; - } - Py_XINCREF(newdata->src_dtype); - - return newdata; -} - -static void -_strided_to_strided_one_to_n(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - _one_to_n_data *d = (_one_to_n_data *)data; - PyArray_Descr *src_dtype = d->src_dtype; - PyArray_StridedTransferFn subtransfer = d->stransfer; - void *subdata = d->data; - npy_intp subN = d->N, dst_itemsize = d->dst_itemsize; - - if (src_dtype == NULL) { - while (N > 0) { - subtransfer(dst, dst_itemsize, - src, 0, - subN, src_itemsize, - subdata); - - src += src_stride; - dst += dst_stride; - --N; - } - } - else { - while (N > 0) { - subtransfer(dst, dst_itemsize, - src, 0, - subN, src_itemsize, - subdata); - - PyArray_Item_XDECREF(src, src_dtype); - - src += src_stride; - dst += dst_stride; - --N; - } - } -} - -static int -get_one_to_n_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - npy_intp N, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - _one_to_n_data *data; - - - data = PyArray_malloc(sizeof(_one_to_n_data)); - if (data == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - - /* - * move_references is set to 0, handled in the wrapping transfer fn, - * src_stride is set to zero, because its 1 to N copying, - * and dst_stride is set to contiguous, because subarrays are always - * contiguous. - */ - if (PyArray_GetDTypeTransferFunction(aligned, - 0, dst_dtype->elsize, - src_dtype, dst_dtype, - 0, - &data->stransfer, &data->data) != NPY_SUCCEED) { - PyArray_free(data); - return NPY_FAIL; - } - data->freefunc = &_one_to_n_data_free; - data->copyfunc = &_one_to_n_data_copy; - data->N = N; - data->dst_itemsize = dst_dtype->elsize; - /* If the src object will need a DECREF, set src_dtype */ - if (move_references && PyDataType_REFCHK(src_dtype)) { - data->src_dtype = src_dtype; - Py_INCREF(src_dtype); - } - else { - data->src_dtype = NULL; - } - - *outstransfer = &_strided_to_strided_one_to_n; - *outtransferdata = data; - - return NPY_SUCCEED; -} - -/**************************** COPY N TO N CONTIGUOUS ************************/ - -/* Copies N contiguous elements to N contiguous elements */ -typedef struct { - void *freefunc, *copyfunc; - PyArray_StridedTransferFn stransfer; - void *data; - npy_intp N, src_itemsize, dst_itemsize; -} _n_to_n_data; - -/* transfer data free function */ -void _n_to_n_data_free(_n_to_n_data *data) -{ - PyArray_FreeStridedTransferData(data->data); - PyArray_free(data); -} - -/* transfer data copy function */ -_n_to_n_data *_n_to_n_data_copy(_n_to_n_data *data) -{ - _n_to_n_data *newdata; - - /* Allocate the data, and populate it */ - newdata = (_n_to_n_data *)PyArray_malloc(sizeof(_n_to_n_data)); - if (newdata == NULL) { - return NULL; - } - memcpy(newdata, data, sizeof(_n_to_n_data)); - newdata->data = PyArray_CopyStridedTransferData(data->data); - if (newdata->data == NULL) { - PyArray_free(newdata); - return NULL; - } - - return newdata; -} - -static void -_strided_to_strided_n_to_n(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - void *data) -{ - _n_to_n_data *d = (_n_to_n_data *)data; - PyArray_StridedTransferFn subtransfer = d->stransfer; - void *subdata = d->data; - npy_intp subN = d->N, src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize; - - while (N > 0) { - subtransfer(dst, dst_subitemsize, - src, src_subitemsize, - subN, src_subitemsize, - subdata); - - src += src_stride; - dst += dst_stride; - --N; - } -} - -static void -_contig_to_contig_n_to_n(char *dst, npy_intp NPY_UNUSED(dst_stride), - char *src, npy_intp NPY_UNUSED(src_stride), - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - void *data) -{ - _n_to_n_data *d = (_n_to_n_data *)data; - PyArray_StridedTransferFn subtransfer = d->stransfer; - void *subdata = d->data; - npy_intp subN = d->N, src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize; - - subtransfer(dst, dst_subitemsize, - src, src_subitemsize, - subN*N, src_subitemsize, - subdata); -} - -static int -get_n_to_n_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - npy_intp N, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - _n_to_n_data *data; - - - data = PyArray_malloc(sizeof(_n_to_n_data)); - if (data == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - - /* - * src_stride and dst_stride are set to contiguous, because - * subarrays are always contiguous. - */ - if (PyArray_GetDTypeTransferFunction(aligned, - src_dtype->elsize, dst_dtype->elsize, - src_dtype, dst_dtype, - move_references, - &data->stransfer, &data->data) != NPY_SUCCEED) { - PyArray_free(data); - return NPY_FAIL; - } - data->freefunc = &_n_to_n_data_free; - data->copyfunc = &_n_to_n_data_copy; - data->N = N; - data->src_itemsize = src_dtype->elsize; - data->dst_itemsize = dst_dtype->elsize; - - /* - * If the N subarray elements exactly fit in the strides, - * then can do a faster contiguous transfer. - */ - if (src_stride == N * src_dtype->elsize && - dst_stride == N * dst_dtype->elsize) { - *outstransfer = &_contig_to_contig_n_to_n; - } - else { - *outstransfer = &_strided_to_strided_n_to_n; - } - *outtransferdata = data; - - return NPY_SUCCEED; -} - -/********************** COPY WITH SUBARRAY BROADCAST ************************/ - -/* Copies element with subarray broadcasting */ -typedef struct { - void *freefunc, *copyfunc; - PyArray_StridedTransferFn stransfer; - void *data; - npy_intp src_N, dst_N, src_itemsize, dst_itemsize; - /* If this is non-NULL the source type has references needing a decref */ - PyArray_Descr *src_dtype; - /* If this is non-NULL, the dest type has references needing a decref */ - PyArray_Descr *dst_dtype; - npy_intp offsets; -} _subarray_broadcast_data; - -/* transfer data free function */ -void _subarray_broadcast_data_free(_subarray_broadcast_data *data) -{ - PyArray_FreeStridedTransferData(data->data); - Py_XDECREF(data->src_dtype); - Py_XDECREF(data->dst_dtype); - PyArray_free(data); -} - -/* transfer data copy function */ -_subarray_broadcast_data *_subarray_broadcast_data_copy( - _subarray_broadcast_data *data) -{ - _subarray_broadcast_data *newdata; - npy_intp dst_N = data->dst_N, structsize; - - structsize = sizeof(_subarray_broadcast_data) + dst_N*NPY_SIZEOF_INTP; - - /* Allocate the data and populate it */ - newdata = (_subarray_broadcast_data *)PyArray_malloc(structsize); - if (newdata == NULL) { - return NULL; - } - memcpy(newdata, data, structsize); - newdata->data = PyArray_CopyStridedTransferData(data->data); - if (newdata->data == NULL) { - PyArray_free(newdata); - return NULL; - } - Py_XINCREF(newdata->src_dtype); - Py_XINCREF(newdata->dst_dtype); - - return newdata; -} - -static void -_strided_to_strided_subarray_broadcast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - void *data) -{ - _subarray_broadcast_data *d = (_subarray_broadcast_data *)data; - PyArray_Descr *src_dtype = d->src_dtype, *dst_dtype = d->dst_dtype; - PyArray_StridedTransferFn subtransfer = d->stransfer; - void *subdata = d->data; - npy_intp i, dst_subN = d->dst_N, src_subN = d->src_N, - src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize; - npy_intp *offsets = &d->offsets; - - if (src_dtype == NULL && dst_dtype == NULL) { - while (N > 0) { - for (i = 0; i < dst_subN; ++i) { - if (offsets[i] != -1) { - subtransfer(dst + i*dst_subitemsize, dst_subitemsize, - src + offsets[i], src_subitemsize, - 1, src_subitemsize, - subdata); - } - else { - char *tmp = dst + i*dst_subitemsize; - memset(tmp, 0, dst_subitemsize); - } - } - - src += src_stride; - dst += dst_stride; - --N; - } - } - else { - while (N > 0) { - for (i = 0; i < dst_subN; ++i) { - if (offsets[i] != -1) { - subtransfer(dst + i*dst_subitemsize, dst_subitemsize, - src + offsets[i], src_subitemsize, - 1, src_subitemsize, - subdata); - } - else { - char *tmp = dst + i*dst_subitemsize; - if (dst_dtype) { - PyArray_Item_XDECREF(tmp, dst_dtype); - } - memset(tmp, 0, dst_subitemsize); - } - } - - for (i = 0; i < src_subN; ++i) { - PyArray_Item_XDECREF(src + i*src_subitemsize, src_dtype); - } - - src += src_stride; - dst += dst_stride; - --N; - } - } -} - - -static int -get_subarray_broadcast_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - npy_intp src_size, npy_intp dst_size, - PyArray_Dims src_shape, PyArray_Dims dst_shape, - int move_references, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - _subarray_broadcast_data *data; - npy_intp structsize, index, src_index, dst_index, i, ndim, *offsets; - - structsize = sizeof(_subarray_broadcast_data) + dst_size*NPY_SIZEOF_INTP; - - /* Allocate the data and populate it */ - data = (_subarray_broadcast_data *)PyArray_malloc(structsize); - if (data == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - - /* - * move_references is set to 0, handled in the wrapping transfer fn, - * src_stride and dst_stride are set to contiguous, as N will always - * be 1 when it's called. - */ - if (PyArray_GetDTypeTransferFunction(aligned, - src_dtype->elsize, dst_dtype->elsize, - src_dtype, dst_dtype, - 0, - &data->stransfer, &data->data) != NPY_SUCCEED) { - PyArray_free(data); - return NPY_FAIL; - } - data->freefunc = &_subarray_broadcast_data_free; - data->copyfunc = &_subarray_broadcast_data_copy; - data->src_N = src_size; - data->dst_N = dst_size; - data->src_itemsize = src_dtype->elsize; - data->dst_itemsize = dst_dtype->elsize; - - /* If the src object will need a DECREF, set src_dtype */ - if (move_references && PyDataType_REFCHK(src_dtype)) { - data->src_dtype = src_dtype; - Py_INCREF(src_dtype); - } - else { - data->src_dtype = NULL; - } - - /* If the dst object needs a DECREF to set it to NULL, set dst_dtype */ - if (PyDataType_REFCHK(dst_dtype)) { - data->dst_dtype = dst_dtype; - Py_INCREF(dst_dtype); - } - else { - data->dst_dtype = NULL; - } - - /* Calculate the broadcasting and set the offsets */ - offsets = &data->offsets; - ndim = (src_shape.len > dst_shape.len) ? src_shape.len : dst_shape.len; - for (index = 0; index < dst_size; ++index) { - dst_index = index; - src_index = 0; - for (i = ndim-1; i >= 0; --i) { - npy_intp coord = 0, shape; - - /* Get the dst coord of this index for dimension i */ - if (i >= ndim - dst_shape.len) { - shape = dst_shape.ptr[i-(ndim-dst_shape.len)]; - coord = dst_index % shape; - dst_index /= shape; - } - - /* Translate it into a src coord and update src_index */ - if (i >= ndim - src_shape.len) { - shape = src_shape.ptr[i-(ndim-src_shape.len)]; - if (shape == 1) { - coord = 0; - } - else { - if (coord < shape) { - src_index *= shape; - src_index += coord; - } - else { - /* Out of bounds, flag with -1 */ - src_index = -1; - break; - } - } - } - } - /* Set the offset */ - if (src_index == -1) { - offsets[index] = -1; - } - else { - offsets[index] = src_index * src_dtype->elsize; - } - } - - *outstransfer = &_strided_to_strided_subarray_broadcast; - *outtransferdata = data; - - return NPY_SUCCEED; -} - -/* - * Handles subarray transfer. To call this, at least one of the dtype's - * subarrays must be non-NULL - */ -static int -get_subarray_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - PyArray_Dims src_shape = {NULL, -1}, dst_shape = {NULL, -1}; - npy_intp src_size = 1, dst_size = 1; - - /* Get the subarray shapes and sizes */ - if (src_dtype->subarray != NULL) { - if (!(PyArray_IntpConverter(src_dtype->subarray->shape, - &src_shape))) { - PyErr_SetString(PyExc_ValueError, - "invalid shape in fixed-type tuple."); - return NPY_FAIL; - } - src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len); - src_dtype = src_dtype->subarray->base; - } - if (dst_dtype->subarray != NULL) { - if (!(PyArray_IntpConverter(dst_dtype->subarray->shape, - &dst_shape))) { - if (src_shape.ptr != NULL) { - PyDimMem_FREE(src_shape.ptr); - } - PyErr_SetString(PyExc_ValueError, - "invalid shape in fixed-type tuple."); - return NPY_FAIL; - } - dst_size = PyArray_MultiplyList(dst_shape.ptr, dst_shape.len); - dst_dtype = dst_dtype->subarray->base; - } - - /* - * Just a straight one-element copy. If the source size isn't 1, - * we copy the element at index 0. If the source data type is - * a reference and we're moving references, a DECREF for each - * source element would also be needed, so the general case will be - * used below - */ - if (dst_size == 1 && (src_size == 1 || - !move_references || - !PyDataType_REFCHK(src_dtype))) { - PyDimMem_FREE(src_shape.ptr); - PyDimMem_FREE(dst_shape.ptr); - - return PyArray_GetDTypeTransferFunction(aligned, - src_stride, dst_stride, - dst_dtype, dst_dtype, - move_references, - outstransfer, outtransferdata); - } - /* Copy the src value to all the dst values */ - else if (src_size == 1) { - PyDimMem_FREE(src_shape.ptr); - PyDimMem_FREE(dst_shape.ptr); - - return get_one_to_n_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - dst_size, - outstransfer, outtransferdata); - } - /* If the shapes match exactly, do an n to n copy */ - else if (src_shape.len == dst_shape.len && - PyArray_CompareLists(src_shape.ptr, dst_shape.ptr, - src_shape.len)) { - PyDimMem_FREE(src_shape.ptr); - PyDimMem_FREE(dst_shape.ptr); - - return get_n_to_n_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - src_size, - outstransfer, outtransferdata); - } - /* - * Copy the subarray with broadcasting, truncating, and zero-padding - * as necessary. - */ - else { - int ret = get_subarray_broadcast_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - src_size, dst_size, - src_shape, dst_shape, - move_references, - outstransfer, outtransferdata); - - PyDimMem_FREE(src_shape.ptr); - PyDimMem_FREE(dst_shape.ptr); - return ret; - } -} - -/* - * Handles subarray transfer. To call this, at least one of the dtype's - * fields must be non-NULL - */ -static int -get_fields_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - /* TODO! */ - PyErr_SetString(PyExc_ValueError, - "fields not supported by transfer functions yet"); - return NPY_FAIL; -} - -NPY_NO_EXPORT int -PyArray_GetDTypeTransferFunction(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedTransferFn *outstransfer, - void **outtransferdata) -{ - npy_intp src_itemsize = src_dtype->elsize, - dst_itemsize = dst_dtype->elsize; - int src_type_num = src_dtype->type_num, - dst_type_num = dst_dtype->type_num; - - /* First look at the possibilities of just a copy or swap */ - if (src_itemsize == dst_itemsize && src_dtype->kind == dst_dtype->kind && - src_type_num < NPY_NTYPES && dst_type_num < NPY_NTYPES && - (src_dtype->fields == NULL || src_dtype->fields == Py_None) && - (dst_dtype->fields == NULL || dst_dtype->fields == Py_None) && - src_dtype->subarray == NULL && dst_dtype->subarray == NULL) { - /* The special types, which have no byte-order */ - switch (src_type_num) { - case NPY_VOID: - case NPY_STRING: - case NPY_UNICODE: - *outstransfer = PyArray_GetStridedCopyFn(0, - src_stride, dst_stride, - src_itemsize); - *outtransferdata = NULL; - return NPY_SUCCEED; - case NPY_OBJECT: - if (move_references) { - *outstransfer = &_strided_to_strided_move_references; - *outtransferdata = NULL; - } - else { - *outstransfer = &_strided_to_strided_copy_references; - *outtransferdata = NULL; - } - return NPY_SUCCEED; - } - - /* This is a straight copy */ - if (src_itemsize == 1 || PyArray_ISNBO(src_dtype->byteorder) == - PyArray_ISNBO(dst_dtype->byteorder)) { - *outstransfer = PyArray_GetStridedCopyFn(aligned, - src_stride, dst_stride, - src_itemsize); - *outtransferdata = NULL; - return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - /* This is a straight copy + byte swap */ - else if (!PyTypeNum_ISCOMPLEX(src_type_num)) { - *outstransfer = PyArray_GetStridedCopySwapFn(aligned, - src_stride, dst_stride, - src_itemsize); - *outtransferdata = NULL; - return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - /* This is a straight copy + element pair byte swap */ - else { - *outstransfer = PyArray_GetStridedCopySwapPairFn(aligned, - src_stride, dst_stride, - src_itemsize); - *outtransferdata = NULL; - return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - } - - /* Handle subarrays */ - if (src_dtype->subarray != NULL || dst_dtype->subarray != NULL) { - return get_subarray_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - outstransfer, outtransferdata); - } - - /* Handle fields */ - if ((src_dtype->fields != NULL && src_dtype->fields != Py_None) || - (dst_dtype->fields != NULL && dst_dtype->fields != Py_None)) { - return get_fields_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - outstransfer, outtransferdata); - } - - /* Check for different-sized strings, unicodes, or voids */ - if (src_type_num == dst_type_num) switch (src_type_num) { - case NPY_STRING: - case NPY_UNICODE: - case NPY_VOID: - return PyArray_GetStridedZeroPadCopyFn(0, - src_stride, dst_stride, - src_dtype->elsize, dst_dtype->elsize, - outstransfer, outtransferdata); - } - - /* Otherwise a cast is necessary */ - return get_cast_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - outstransfer, outtransferdata); - -#if 0 - /* TODO check for fields & subarrays */ - printf("\n"); - PyObject_Print((PyObject *)src_dtype, stdout, 0); - printf(" -> "); - PyObject_Print((PyObject *)dst_dtype, stdout, 0); - printf("\n"); - - /* TODO: write the more complicated transfer code! */ - *outstransfer = NULL; - *outtransferdata = NULL; - PyErr_SetString(PyExc_RuntimeError, - "General transfer function support has not been written yet"); - return NPY_FAIL; -#endif -} - typedef void (*_npy_stridedtransfer_dealloc)(void *); NPY_NO_EXPORT void PyArray_FreeStridedTransferData(void *transferdata) @@ -1892,7 +681,6 @@ PyArray_CopyStridedTransferData(void *transferdata) } - NPY_NO_EXPORT npy_intp PyArray_TransferNDimToStrided(npy_intp ndim, char *dst, npy_intp dst_stride, diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.h b/numpy/core/src/multiarray/lowlevel_strided_loops.h index 2d36f12f6..ef962038a 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.h +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.h @@ -87,13 +87,37 @@ PyArray_GetStridedCopySwapPairFn(npy_intp aligned, npy_intp src_stride, * Returns NPY_SUCCEED or NPY_FAIL */ NPY_NO_EXPORT int -PyArray_GetStridedZeroPadCopyFn(npy_intp aligned, +PyArray_GetStridedZeroPadCopyFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp src_itemsize, npy_intp dst_itemsize, PyArray_StridedTransferFn *outstransfer, void **outtransferdata); /* + * Returns a transfer function which DECREFs any references in src_type. + * + * Returns NPY_SUCCEED or NPY_FAIL. + */ +NPY_NO_EXPORT int +PyArray_GetDecSrcRefTransferFunction(int aligned, + npy_intp src_stride, + PyArray_Descr *src_dtype, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata); + +/* + * Returns a transfer function which zeros out the dest values. + * + * Returns NPY_SUCCEED or NPY_FAIL. + */ +NPY_NO_EXPORT int +PyArray_GetSetDstZeroTransferFunction(int aligned, + npy_intp dst_stride, + PyArray_Descr *dst_dtype, + PyArray_StridedTransferFn *outstransfer, + void **outtransferdata); + +/* * If it's possible, gives back a transfer function which casts and/or * byte swaps data with the dtype 'src_dtype' into data with the dtype * 'dst_dtype'. If the outtransferdata is populated with a non-NULL value, diff --git a/numpy/core/src/multiarray/multiarraymodule_onefile.c b/numpy/core/src/multiarray/multiarraymodule_onefile.c index 89f6dab32..f29cf3244 100644 --- a/numpy/core/src/multiarray/multiarraymodule_onefile.c +++ b/numpy/core/src/multiarray/multiarraymodule_onefile.c @@ -38,7 +38,7 @@ #include "new_iterator.c" #include "new_iterator_pywrap.c" #include "lowlevel_strided_loops.c" - +#include "dtype_transfer.c" #ifndef Py_UNICODE_WIDE diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index b518fb01b..85eaee7fb 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -4484,6 +4484,9 @@ npyiter_copy_from_buffers(NpyIter *iter) PyArray_Item_XDECREF(buffer, dtypes[iiter]); buffer += itemsize; } + + /* Also set the values to zero for safety */ + memset(buffers[iiter], 0, itemsize*size); } } } @@ -4585,13 +4588,13 @@ npyiter_copy_to_buffers(NpyIter *iter) } if (stransfer != NULL) { - npy_intp itemsize = PyArray_DESCR(operands[iiter])->elsize; + npy_intp src_itemsize = PyArray_DESCR(operands[iiter])->elsize; any_buffered = 1; /* If the data type requires zero-inititialization */ if (PyDataType_FLAGCHK(dtypes[iiter], NPY_NEEDS_INIT)) { - memset(ptrs[iiter], 0, itemsize*transfersize); + memset(ptrs[iiter], 0, dtypes[iiter]->elsize*transfersize); } PyArray_TransferNDimToStrided(ndim, @@ -4599,7 +4602,7 @@ npyiter_copy_to_buffers(NpyIter *iter) ad_ptrs[iiter], &ad_strides[iiter], axisdata_incr, &NAD_COORD(axisdata), axisdata_incr, &NAD_SHAPE(axisdata), axisdata_incr, - transfersize, itemsize, + transfersize, src_itemsize, stransfer, transferdata); } diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 8c98db6b7..492364a17 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -2037,7 +2037,9 @@ OBJECT_@kind@(char **args, intp *dimensions, intp *steps, void *NPY_UNUSED(func) BINARY_LOOP { PyObject *in1 = *(PyObject **)ip1; PyObject *in2 = *(PyObject **)ip2; - int ret = PyObject_RichCompareBool(in1, in2, Py_@OP@); + int ret = PyObject_RichCompareBool( + in1 ? in1 : Py_None, + in2 ? in2 : Py_None, Py_@OP@); if (ret == -1) { return; } diff --git a/numpy/core/tests/test_new_iterator.py b/numpy/core/tests/test_new_iterator.py index d83515948..87c9ce14e 100644 --- a/numpy/core/tests/test_new_iterator.py +++ b/numpy/core/tests/test_new_iterator.py @@ -1,7 +1,7 @@ import numpy as np from numpy import array, arange, newiter from numpy.testing import * -import sys +import sys, warnings import warnings @@ -919,6 +919,44 @@ def test_iter_object_arrays(): assert_equal(sys.getrefcount(obj), rc-1) assert_equal(a, np.array([None]*4, dtype='O')) + # Conversions to/from objects + a = np.arange(6, dtype='O') + i = newiter(a, ['refs_ok','buffered'], ['readwrite'], + casting='unsafe', op_dtypes='i4') + for x in i: + x[()] += 1 + assert_equal(a, np.arange(6)+1) + + a = np.arange(6, dtype='i4') + i = newiter(a, ['refs_ok','buffered'], ['readwrite'], + casting='unsafe', op_dtypes='O') + for x in i: + x[()] += 1 + assert_equal(a, np.arange(6)+1) + + # Non-contiguous object array + a = np.zeros((6,), dtype=[('p','i1'),('a','O')]) + a = a['a'] + a[:] = np.arange(6) + i = newiter(a, ['refs_ok','buffered'], ['readwrite'], + casting='unsafe', op_dtypes='i4') + for x in i: + x[()] += 1 + assert_equal(a, np.arange(6)+1) + + #Non-contiguous value array + a = np.zeros((6,), dtype=[('p','i1'),('a','i4')]) + a = a['a'] + a[:] = np.arange(6) + 98172488 + i = newiter(a, ['refs_ok','buffered'], ['readwrite'], + casting='unsafe', op_dtypes='O') + ob = i[0][()] + rc = sys.getrefcount(ob) + for x in i: + x[()] += 1 + assert_equal(sys.getrefcount(ob), rc-1) + assert_equal(a, np.arange(6)+98172489) + def test_iter_common_dtype(): # Check that the iterator finds a common data type correctly @@ -1449,7 +1487,8 @@ def test_iter_buffered_cast_byteswapped(): a = np.arange(10, dtype='f4').newbyteorder().byteswap() i = newiter(a, ['buffered','no_inner_iteration'], - [['readwrite','nbo','aligned','same_kind_casts']], + [['readwrite','nbo','aligned']], + casting='same_kind', op_dtypes=[np.dtype('f8').newbyteorder()], buffersize=3) for v in i: @@ -1457,17 +1496,23 @@ def test_iter_buffered_cast_byteswapped(): assert_equal(a, 2*np.arange(10, dtype='f4')) - a = np.arange(10, dtype='f8').newbyteorder().byteswap() - i = newiter(a, ['buffered','no_inner_iteration'], - [['readwrite','nbo','aligned','unsafe_casts']], - op_dtypes=[np.dtype('c8').newbyteorder()], - buffersize=3) - for v in i: - v[()] *= 2 - - assert_equal(a, 2*np.arange(10, dtype='f8')) - -def test_iter_buffered_cast_byteswapped(): + try: + warnings.simplefilter("ignore", np.ComplexWarning) + + a = np.arange(10, dtype='f8').newbyteorder().byteswap() + i = newiter(a, ['buffered','no_inner_iteration'], + [['readwrite','nbo','aligned']], + casting='unsafe', + op_dtypes=[np.dtype('c8').newbyteorder()], + buffersize=3) + for v in i: + v[()] *= 2 + + assert_equal(a, 2*np.arange(10, dtype='f8')) + finally: + warnings.simplefilter("default", np.ComplexWarning) + +def test_iter_buffered_cast_byteswapped_complex(): # Test that buffering can handle a cast which requires swap->cast->copy a = np.arange(10, dtype='c8').newbyteorder().byteswap() @@ -1513,6 +1558,296 @@ def test_iter_buffered_cast_byteswapped(): v[()] *= 2 assert_equal(a, 2*np.arange(10, dtype=np.longdouble)) +def test_iter_buffered_cast_structured_type(): + # Tests buffering of structured types + + # simple -> struct type (duplicates the value) + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2,3)), ('d', 'O')] + a = np.arange(3, dtype='f4') + 0.5 + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt) + vals = [np.array(x) for x in i] + assert_equal(vals[0]['a'], 0.5) + assert_equal(vals[0]['b'], 0) + assert_equal(vals[0]['c'], [[(0.5)]*3]*2) + assert_equal(vals[0]['d'], 0.5) + assert_equal(vals[1]['a'], 1.5) + assert_equal(vals[1]['b'], 1) + assert_equal(vals[1]['c'], [[(1.5)]*3]*2) + assert_equal(vals[1]['d'], 1.5) + assert_equal(vals[0].dtype, np.dtype(sdt)) + + # object -> struct type + sdt = [('a', 'f4'), ('b', 'i8'), ('c', 'c8', (2,3)), ('d', 'O')] + a = np.arange(3, dtype='O') + 0.5 + rc = sys.getrefcount(a[0]) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt) + vals = [np.array(x) for x in i] + assert_equal(vals[0]['a'], 0.5) + assert_equal(vals[0]['b'], 0) + assert_equal(vals[0]['c'], [[(0.5)]*3]*2) + assert_equal(vals[0]['d'], 0.5) + assert_equal(vals[1]['a'], 1.5) + assert_equal(vals[1]['b'], 1) + assert_equal(vals[1]['c'], [[(1.5)]*3]*2) + assert_equal(vals[1]['d'], 1.5) + assert_equal(vals[0].dtype, np.dtype(sdt)) + vals, i, x = [None]*3 + assert_equal(sys.getrefcount(a[0]), rc) + + # struct type -> simple (takes the first value) + sdt = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + a = np.array([(5.5,7,'test'),(8,10,11)], dtype=sdt) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes='i4') + assert_equal([x[()] for x in i], [5, 8]) + + # struct type -> struct type (field-wise copy) + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + sdt2 = [('d', 'u2'), ('a', 'O'), ('b', 'f8')] + a = np.array([(1,2,3),(4,5,6)], dtype=sdt1) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + assert_equal([np.array(x) for x in i], + [np.array((3,1,2), dtype=sdt2), + np.array((6,4,5), dtype=sdt2)]) + + # struct type -> struct type (field gets discarded) + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', 'O')] + sdt2 = [('b', 'O'), ('a', 'f8')] + a = np.array([(1,2,3),(4,5,6)], dtype=sdt1) + i = newiter(a, ['buffered','refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + vals = [] + for x in i: + vals.append(np.array(x)) + x['a'] = x['b']+3 + assert_equal(vals, [np.array((2,1), dtype=sdt2), + np.array((5,4), dtype=sdt2)]) + assert_equal(a, np.array([(5,2,None),(8,5,None)], dtype=sdt1)) + + # struct type -> struct type (structured field gets discarded) + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', [('a', 'i2'),('b','i4')])] + sdt2 = [('b', 'O'), ('a', 'f8')] + a = np.array([(1,2,(0,9)),(4,5,(20,21))], dtype=sdt1) + i = newiter(a, ['buffered','refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + vals = [] + for x in i: + vals.append(np.array(x)) + x['a'] = x['b']+3 + assert_equal(vals, [np.array((2,1), dtype=sdt2), + np.array((5,4), dtype=sdt2)]) + assert_equal(a, np.array([(5,2,(0,0)),(8,5,(0,0))], dtype=sdt1)) + + # struct type -> struct type (structured field w/ ref gets discarded) + sdt1 = [('a', 'f4'), ('b', 'i8'), ('d', [('a', 'i2'),('b','O')])] + sdt2 = [('b', 'O'), ('a', 'f8')] + a = np.array([(1,2,(0,9)),(4,5,(20,21))], dtype=sdt1) + i = newiter(a, ['buffered','refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + vals = [] + for x in i: + vals.append(np.array(x)) + x['a'] = x['b']+3 + assert_equal(vals, [np.array((2,1), dtype=sdt2), + np.array((5,4), dtype=sdt2)]) + assert_equal(a, np.array([(5,2,(0,None)),(8,5,(0,None))], dtype=sdt1)) + + # struct type -> struct type back (structured field w/ ref gets discarded) + sdt1 = [('b', 'O'), ('a', 'f8')] + sdt2 = [('a', 'f4'), ('b', 'i8'), ('d', [('a', 'i2'),('b','O')])] + a = np.array([(1,2),(4,5)], dtype=sdt1) + i = newiter(a, ['buffered','refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + vals = [] + for x in i: + vals.append(np.array(x)) + assert_equal(x['d'], np.array((0, None), dtype=[('a','i2'),('b','O')])) + x['a'] = x['b']+3 + assert_equal(vals, [np.array((2,1,(0,None)), dtype=sdt2), + np.array((5,4,(0,None)), dtype=sdt2)]) + assert_equal(a, np.array([(1,4),(4,7)], dtype=sdt1)) + +def test_iter_buffered_cast_subarray(): + # Tests buffering of subarrays + + # one element -> many (copies it to all) + sdt1 = [('a', 'f4')] + sdt2 = [('a', 'f8', (3,2,2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + for x, count in zip(i, range(6)): + assert_(np.all(x['a'] == count)) + + # one element -> many -> back (copies it to all) + sdt1 = [('a', 'O', (1,1))] + sdt2 = [('a', 'O', (3,2,2))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:,0,0] = np.arange(6) + i = newiter(a, ['buffered','refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_(np.all(x['a'] == count)) + x['a'][0] += 2 + count += 1 + assert_equal(a['a'], np.arange(6).reshape(6,1,1)+2) + + # many -> one element -> back (copies just element 0) + sdt1 = [('a', 'O', (3,2,2))] + sdt2 = [('a', 'O', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:,0,0,0] = np.arange(6) + i = newiter(a, ['buffered','refs_ok'], ['readwrite'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + x['a'] += 2 + count += 1 + assert_equal(a['a'], np.arange(6).reshape(6,1,1,1)*np.ones((1,3,2,2))+2) + + # many -> one element -> back (copies just element 0) + sdt1 = [('a', 'f8', (3,2,2))] + sdt2 = [('a', 'O', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:,0,0,0] = np.arange(6) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + count += 1 + + # many -> one element (copies just element 0) + sdt1 = [('a', 'O', (3,2,2))] + sdt2 = [('a', 'f4', (1,))] + a = np.zeros((6,), dtype=sdt1) + a['a'][:,0,0,0] = np.arange(6) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], count) + count += 1 + + # many -> matching shape (straightforward copy) + sdt1 = [('a', 'O', (3,2,2))] + sdt2 = [('a', 'f4', (3,2,2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*3*2*2).reshape(6,3,2,2) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], a[count]['a']) + count += 1 + + # vector -> smaller vector (truncates) + sdt1 = [('a', 'f8', (6,))] + sdt2 = [('a', 'f4', (2,))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*6).reshape(6,6) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'], a[count]['a'][:2]) + count += 1 + + # vector -> bigger vector (pads with zeros) + sdt1 = [('a', 'f8', (2,))] + sdt2 = [('a', 'f4', (6,))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6,2) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2], a[count]['a']) + assert_equal(x['a'][2:], [0,0,0,0]) + count += 1 + + # vector -> matrix (broadcasts) + sdt1 = [('a', 'f8', (2,))] + sdt2 = [('a', 'f4', (2,2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6,2) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][0], a[count]['a']) + assert_equal(x['a'][1], a[count]['a']) + count += 1 + + # vector -> matrix (broadcasts and zero-pads) + sdt1 = [('a', 'f8', (2,1))] + sdt2 = [('a', 'f4', (3,2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2).reshape(6,2,1) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2,0], a[count]['a'][:,0]) + assert_equal(x['a'][:2,1], a[count]['a'][:,0]) + assert_equal(x['a'][2,:], [0,0]) + count += 1 + + # matrix -> matrix (truncates and zero-pads) + sdt1 = [('a', 'f8', (2,3))] + sdt2 = [('a', 'f4', (3,2))] + a = np.zeros((6,), dtype=sdt1) + a['a'] = np.arange(6*2*3).reshape(6,2,3) + i = newiter(a, ['buffered','refs_ok'], ['readonly'], + casting='unsafe', + op_dtypes=sdt2) + assert_equal(i[0].dtype, np.dtype(sdt2)) + count = 0 + for x in i: + assert_equal(x['a'][:2,0], a[count]['a'][:,0]) + assert_equal(x['a'][:2,1], a[count]['a'][:,1]) + assert_equal(x['a'][2,:], [0,0]) + count += 1 + def test_iter_buffering_badwriteback(): # Writing back from a buffer cannot combine elements |