diff options
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 221 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_mask.c | 26 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 53 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 2 |
5 files changed, 211 insertions, 97 deletions
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 023037abe..aaea290b1 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -3889,10 +3889,10 @@ PyArray_CastRawArrays(npy_intp count, * Returns 0 on success, -1 on failure. */ NPY_NO_EXPORT int -PyArray_PrepareOneRawArrayIter(int ndim, char *data, - npy_intp *shape, npy_intp *strides, - int *out_ndim, char **out_data, - npy_intp *out_shape, npy_intp *out_strides) +PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape, + char *data, npy_intp *strides, + int *out_ndim, npy_intp *out_shape, + char **out_data, npy_intp *out_strides) { _npy_stride_sort_item strideperm[NPY_MAXDIMS]; int i, j; @@ -3975,82 +3975,112 @@ PyArray_PrepareOneRawArrayIter(int ndim, char *data, } /* - * Casts the elements from one n-dimensional array to another n-dimensional - * array with identical shape but possibly different strides and dtypes. - * Does not account for overlap. + * The same as PyArray_PrepareOneRawArrayIter, but for two + * operands instead of one. Any broadcasting of the two operands + * should have already been done before calling this function, + * as the ndim and shape is only specified once for both operands. * - * Returns NPY_SUCCEED or NPY_FAIL. + * Only the strides of the first operand are used to reorder + * the dimensions, no attempt to consider all the strides together + * is made, as is done in the NpyIter object. + * + * You can use this together with NPY_RAW_ITER_START and + * NPY_RAW_ITER_TWO_NEXT to handle the looping boilerplate of everything + * but the innermost loop (which is for idim == 0). + * + * Returns 0 on success, -1 on failure. */ NPY_NO_EXPORT int -PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, - char *src, char *dst, - npy_intp *src_strides, npy_intp *dst_strides, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references) +PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape, + char *dataA, npy_intp *stridesA, + char *dataB, npy_intp *stridesB, + int *out_ndim, npy_intp *out_shape, + char **out_dataA, npy_intp *out_stridesA, + char **out_dataB, npy_intp *out_stridesB) { - PyArray_StridedTransferFn *stransfer = NULL; - NpyAuxData *transferdata = NULL; - int i, j; - npy_intp src_align, dst_align; - int aligned, needs_api = 0; - npy_intp coords[NPY_MAXDIMS]; - npy_intp shape_copy[NPY_MAXDIMS]; - npy_intp src_strides_copy[NPY_MAXDIMS]; - npy_intp dst_strides_copy[NPY_MAXDIMS]; _npy_stride_sort_item strideperm[NPY_MAXDIMS]; + int i, j; - /* Use the simpler function for 0 and 1 dimensional transfers */ - if (ndim <= 1) { - if (ndim == 0) { - return PyArray_CastRawArrays(1, src, dst, 0, 0, - src_dtype, dst_dtype, move_references); + /* Special case 0 and 1 dimensions */ + if (ndim == 0) { + *out_ndim = 1; + *out_dataA = dataA; + *out_dataB = dataB; + out_shape[0] = 1; + out_stridesA[0] = 0; + out_stridesB[0] = 0; + return 0; + } + else if (ndim == 1) { + npy_intp stride_entryA = stridesA[0], stride_entryB = stridesB[0]; + npy_intp shape_entry = shape[0]; + *out_ndim = 1; + out_shape[0] = shape[0]; + /* Always make a positive stride for the first operand */ + if (stride_entryA >= 0) { + *out_dataA = dataA; + *out_dataB = dataB; + out_stridesA[0] = stride_entryA; + out_stridesB[0] = stride_entryB; } else { - return PyArray_CastRawArrays(shape[0], src, dst, - src_strides[0], dst_strides[0], - src_dtype, dst_dtype, move_references); + *out_dataA = dataA + stride_entryA * (shape_entry - 1); + *out_dataB = dataB + stride_entryB * (shape_entry - 1); + out_stridesA[0] = -stride_entryA; + out_stridesB[0] = -stride_entryB; } + return 0; } - /* Determine data alignment */ - src_align = (npy_intp)src; - for (i = 0; i < ndim; ++i) { - src_align |= src_strides[i]; - } - dst_align = (npy_intp)dst; + /* Sort the axes based on the destination strides */ + PyArray_CreateSortedStridePerm(ndim, stridesA, strideperm); for (i = 0; i < ndim; ++i) { - dst_align |= dst_strides[i]; + int iperm = ndim - strideperm[i].perm - 1; + out_shape[i] = shape[iperm]; + out_stridesA[i] = stridesA[iperm]; + out_stridesB[i] = stridesB[iperm]; } - aligned = (src_align & (src_dtype->alignment - 1)) == 0 && - (dst_align & (dst_dtype->alignment - 1)) == 0; - /* Sort the axes based on the destination strides */ - PyArray_CreateSortedStridePerm(ndim, dst_strides, strideperm); + /* Reverse any negative strides of operand A */ for (i = 0; i < ndim; ++i) { - int iperm = strideperm[i].perm; - shape_copy[i] = shape[iperm]; - src_strides_copy[i] = src_strides[iperm]; - dst_strides_copy[i] = dst_strides[iperm]; + npy_intp stride_entryA = out_stridesA[i]; + npy_intp stride_entryB = out_stridesB[i]; + npy_intp shape_entry = out_shape[i]; + + if (stride_entryA < 0) { + dataA += stride_entryA * (shape_entry - 1); + dataB += stride_entryB * (shape_entry - 1); + out_stridesA[i] = -stride_entryA; + out_stridesB[i] = -stride_entryB; + } + /* Detect 0-size arrays here */ + if (shape_entry == 0) { + *out_ndim = 1; + *out_dataA = dataA; + *out_dataB = dataB; + out_shape[0] = 0; + out_stridesA[0] = 0; + out_stridesB[0] = 0; + return 0; + } } - /* Coalesce dimensions where it's possible */ + /* Coalesce any dimensions where possible */ i = 0; for (j = 1; j < ndim; ++j) { - if (shape[i] == 1) { + if (out_shape[i] == 1) { /* Drop axis i */ - shape[i] = shape[j]; - src_strides_copy[i] = src_strides_copy[j]; - dst_strides_copy[i] = dst_strides_copy[j]; + out_shape[i] = out_shape[j]; + out_stridesA[i] = out_stridesA[j]; + out_stridesB[i] = out_stridesB[j]; } - else if (shape[j] == 1) { + else if (out_shape[j] == 1) { /* Drop axis j */ } - else if (src_strides_copy[i] == src_strides_copy[j] * shape[j] && - dst_strides_copy[i] == dst_strides_copy[j] * shape[j]) { + else if (out_stridesA[i] * out_shape[i] == out_stridesA[j] && + out_stridesB[i] * out_shape[i] == out_stridesB[j]) { /* Coalesce axes i and j */ - shape[i] *= shape[j]; - src_strides_copy[i] = src_strides_copy[j]; - dst_strides_copy[i] = dst_strides_copy[j]; + out_shape[i] *= out_shape[j]; } else { /* Can't coalesce, go to next i */ @@ -4059,9 +4089,61 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, } ndim = i+1; + *out_dataA = dataA; + *out_dataB = dataB; + *out_ndim = ndim; + return 0; + +} + +/* + * Casts the elements from one n-dimensional array to another n-dimensional + * array with identical shape but possibly different strides and dtypes. + * Does not account for overlap. + * + * Returns NPY_SUCCEED or NPY_FAIL. + */ +NPY_NO_EXPORT int +PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, + char *src, char *dst, + npy_intp *src_strides, npy_intp *dst_strides, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references) +{ + PyArray_StridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; + int idim; + npy_intp src_align, dst_align; + int aligned, needs_api = 0; + npy_intp coords[NPY_MAXDIMS]; + npy_intp shape_copy[NPY_MAXDIMS]; + npy_intp src_strides_copy[NPY_MAXDIMS]; + npy_intp dst_strides_copy[NPY_MAXDIMS]; + + /* Determine data alignment */ + src_align = (npy_intp)src; + for (idim = 0; idim < ndim; ++idim) { + src_align |= src_strides[idim]; + } + dst_align = (npy_intp)dst; + for (idim = 0; idim < ndim; ++idim) { + dst_align |= dst_strides[idim]; + } + aligned = (src_align & (src_dtype->alignment - 1)) == 0 && + (dst_align & (dst_dtype->alignment - 1)) == 0; + + if (PyArray_PrepareTwoRawArrayIter(ndim, shape, + dst, dst_strides, + src, src_strides, + &ndim, shape_copy, + &dst, dst_strides_copy, + &src, src_strides_copy) < 0) { + return NPY_FAIL; + } + /* Get the function to do the casting */ if (PyArray_GetDTypeTransferFunction(aligned, - src_strides[ndim-1], dst_strides[ndim-1], + src_strides[0], dst_strides[0], src_dtype, dst_dtype, move_references, &stransfer, &transferdata, @@ -4069,26 +4151,13 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, return NPY_FAIL; } - /* Do the copying */ - memset(coords, 0, ndim * sizeof(npy_intp)); - do { - /* Copy along the last dimension */ - i = ndim - 1; - stransfer(dst, dst_strides_copy[i], src, src_strides_copy[i], shape[i], + NPY_RAW_ITER_START(idim, ndim, coords, shape_copy) { + stransfer(dst, dst_strides_copy[0], + src, src_strides_copy[0], shape_copy[0], src_dtype->elsize, transferdata); - --i; - /* Increment to the next n-dimensional coordinate */ - for (;i > 0; --i) { - if (++coords[i] == shape[i]) { - coords[i] = 0; - src -= (shape[i] - 1) * src_strides_copy[i]; - } - else { - src += src_strides_copy[i]; - break; - } - } - } while (i > 0); + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coords, shape_copy, + src, src_strides, + dst, dst_strides); /* Cleanup */ NPY_AUXDATA_FREE(transferdata); diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index c0290174f..08098d0b1 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -1707,8 +1707,10 @@ count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides) /* Use raw iteration with no heap memory allocation */ if (PyArray_PrepareOneRawArrayIter( - ndim, data, ashape, astrides, - &ndim, &data, shape, strides) < 0) { + ndim, ashape, + data, astrides, + &ndim, shape, + &data, strides) < 0) { return -1; } diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c index ef80c310c..ecb54bc7e 100644 --- a/numpy/core/src/multiarray/na_mask.c +++ b/numpy/core/src/multiarray/na_mask.c @@ -56,15 +56,16 @@ PyArray_ContainsNA(PyArrayObject *arr) /* Use raw iteration with no heap memory allocation */ if (PyArray_PrepareOneRawArrayIter( - PyArray_NDIM(arr), PyArray_MASKNA_DATA(arr), - PyArray_DIMS(arr), PyArray_MASKNA_STRIDES(arr), - &ndim, &data, shape, strides) < 0) { + PyArray_NDIM(arr), PyArray_DIMS(arr), + PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr), + &ndim, shape, + &data, strides) < 0) { PyErr_Clear(); return 1; } /* Do the iteration */ - NPY_RAW_ITER_START(idim, ndim, coord, shape, strides, data) { + NPY_RAW_ITER_START(idim, ndim, coord, shape) { char *d = data; /* Process the innermost dimension */ for (i = 0; i < shape[0]; ++i, d += strides[0]) { @@ -72,7 +73,7 @@ PyArray_ContainsNA(PyArrayObject *arr) return 1; } } - } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, strides, data); + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); } return 0; @@ -108,29 +109,30 @@ PyArray_AssignMaskNA(PyArrayObject *arr, npy_mask maskvalue) /* Use raw iteration with no heap memory allocation */ if (PyArray_PrepareOneRawArrayIter( - PyArray_NDIM(arr), PyArray_MASKNA_DATA(arr), - PyArray_DIMS(arr), PyArray_MASKNA_STRIDES(arr), - &ndim, &data, shape, strides) < 0) { + PyArray_NDIM(arr), PyArray_DIMS(arr), + PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr), + &ndim, shape, + &data, strides) < 0) { PyErr_Clear(); return 1; } /* Special case contiguous inner stride */ if (strides[0] == 1) { - NPY_RAW_ITER_START(idim, ndim, coord, shape, strides, data) { + NPY_RAW_ITER_START(idim, ndim, coord, shape) { /* Process the innermost dimension */ memset(data, maskvalue, shape[0]); - } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, strides, data); + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); } /* General inner stride */ else { - NPY_RAW_ITER_START(idim, ndim, coord, shape, strides, data) { + NPY_RAW_ITER_START(idim, ndim, coord, shape) { char *d = data; /* Process the innermost dimension */ for (i = 0; i < shape[0]; ++i, d += strides[0]) { *(npy_mask *)d = maskvalue; } - } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, strides, data); + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); } return 0; diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index 711239f35..840f4ec04 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -334,18 +334,42 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, * Returns 0 on success, -1 on failure. */ NPY_NO_EXPORT int -PyArray_PrepareOneRawArrayIter(int ndim, char *data, - npy_intp *shape, npy_intp *strides, - int *out_ndim, char **out_data, - npy_intp *out_shape, npy_intp *out_strides); +PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape, + char *data, npy_intp *strides, + int *out_ndim, npy_intp *out_shape, + char **out_data, npy_intp *out_strides); + +/* + * The same as PyArray_PrepareOneRawArrayIter, but for two + * operands instead of one. Any broadcasting of the two operands + * should have already been done before calling this function, + * as the ndim and shape is only specified once for both operands. + * + * Only the strides of the first operand are used to reorder + * the dimensions, no attempt to consider all the strides together + * is made, as is done in the NpyIter object. + * + * You can use this together with NPY_RAW_ITER_START and + * NPY_RAW_ITER_TWO_NEXT to handle the looping boilerplate of everything + * but the innermost loop (which is for idim == 0). + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape, + char *dataA, npy_intp *stridesA, + char *dataB, npy_intp *stridesB, + int *out_ndim, npy_intp *out_shape, + char **out_dataA, npy_intp *out_stridesA, + char **out_dataB, npy_intp *out_stridesB); /* Start raw iteration */ -#define NPY_RAW_ITER_START(idim, ndim, coord, shape, strides, data) \ +#define NPY_RAW_ITER_START(idim, ndim, coord, shape) \ memset((coord), 0, (ndim) * sizeof(coord[0])); \ do { /* Increment to the next n-dimensional coordinate for one raw array */ -#define NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, strides, data) \ +#define NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides) \ for ((idim) = 1; (idim) < (ndim); ++(idim)) { \ if (++(coord)[idim] == (shape)[idim]) { \ (coord)[idim] = 0; \ @@ -358,6 +382,23 @@ PyArray_PrepareOneRawArrayIter(int ndim, char *data, } \ } while ((idim) < (ndim)); \ +/* Increment to the next n-dimensional coordinate for two raw arrays */ +#define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \ + dataA, stridesA, dataB, stridesB) \ + for ((idim) = 1; (idim) < (ndim); ++(idim)) { \ + if (++(coord)[idim] == (shape)[idim]) { \ + (coord)[idim] = 0; \ + (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \ + (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \ + } \ + else { \ + (dataA) += (stridesA)[idim]; \ + (dataB) += (stridesB)[idim]; \ + break; \ + } \ + } \ + } while ((idim) < (ndim)); \ + /* diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 643394d6a..eb1c231d9 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3528,7 +3528,7 @@ PyUFunc_GenericReduction(PyUFuncObject *self, PyObject *args, } if (self->nout != 1) { PyErr_Format(PyExc_ValueError, - "%s only supported for functions " \ + "%s only supported for functions " "returning a single value", _reduce_type[operation]); return NULL; |