diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-08-01 11:50:56 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:49 -0600 |
commit | 9aa599c90f49cd3b435b0ce9a637fee5fa94321a (patch) | |
tree | d27ec7d43c9a4213b402d2da4a4dd74a9ea40ab9 /numpy | |
parent | 3441a3e7b0314488a697a839357e453f21de0577 (diff) | |
download | numpy-9aa599c90f49cd3b435b0ce9a637fee5fa94321a.tar.gz |
ENH: missingdata: Write function for reducing the NA mask array
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/code_generators/numpy_api.py | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 26 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 42 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_mask.c | 182 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 5 |
5 files changed, 173 insertions, 83 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index 49d38c32f..7b1fa26c3 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -326,6 +326,7 @@ multiarray_funcs_api = { 'PyArray_AllocateMaskNA': 286, 'NpyIter_GetFirstMaskNAOp': 288, 'NpyIter_GetMaskNAIndices': 289, + 'PyArray_ReduceMaskArray': 290, } ufunc_types_api = { diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index aaea290b1..23c1f4e76 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -4115,10 +4115,10 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, int idim; npy_intp src_align, dst_align; int aligned, needs_api = 0; - npy_intp coords[NPY_MAXDIMS]; - npy_intp shape_copy[NPY_MAXDIMS]; - npy_intp src_strides_copy[NPY_MAXDIMS]; - npy_intp dst_strides_copy[NPY_MAXDIMS]; + npy_intp coord[NPY_MAXDIMS]; + npy_intp shape_it[NPY_MAXDIMS]; + npy_intp src_strides_it[NPY_MAXDIMS]; + npy_intp dst_strides_it[NPY_MAXDIMS]; /* Determine data alignment */ src_align = (npy_intp)src; @@ -4135,9 +4135,9 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, if (PyArray_PrepareTwoRawArrayIter(ndim, shape, dst, dst_strides, src, src_strides, - &ndim, shape_copy, - &dst, dst_strides_copy, - &src, src_strides_copy) < 0) { + &ndim, shape_it, + &dst, dst_strides_it, + &src, src_strides_it) < 0) { return NPY_FAIL; } @@ -4151,13 +4151,13 @@ PyArray_CastRawNDimArrays(int ndim, npy_intp *shape, return NPY_FAIL; } - NPY_RAW_ITER_START(idim, ndim, coords, shape_copy) { - stransfer(dst, dst_strides_copy[0], - src, src_strides_copy[0], shape_copy[0], + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + stransfer(dst, dst_strides_it[0], + src, src_strides_it[0], shape_it[0], src_dtype->elsize, transferdata); - } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coords, shape_copy, - src, src_strides, - dst, dst_strides); + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, + src, src_strides_it, + dst, dst_strides_it); /* Cleanup */ NPY_AUXDATA_FREE(transferdata); diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 08098d0b1..5be85aaef 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -19,8 +19,6 @@ #include "lowlevel_strided_loops.h" #include "item_selection.h" -#define _check_axis PyArray_CheckAxis - /*NUMPY_API * Take */ @@ -37,7 +35,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, int err; indices = NULL; - self = (PyArrayObject *)_check_axis(self0, &axis, NPY_ARRAY_CARRAY); + self = (PyArrayObject *)PyArray_CheckAxis(self0, &axis, NPY_ARRAY_CARRAY); if (self == NULL) { return NULL; } @@ -507,7 +505,7 @@ PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis) nd = PyArray_NDIM(repeats); counts = (npy_intp *)PyArray_DATA(repeats); - if ((ap=_check_axis(aop, &axis, NPY_ARRAY_CARRAY))==NULL) { + if ((ap=PyArray_CheckAxis(aop, &axis, NPY_ARRAY_CARRAY))==NULL) { Py_DECREF(repeats); return NULL; } @@ -1080,7 +1078,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) } /* Creates new reference op2 */ - if ((op2=(PyArrayObject *)_check_axis(op, &axis, 0)) == NULL) { + if ((op2=(PyArrayObject *)PyArray_CheckAxis(op, &axis, 0)) == NULL) { return NULL; } /* Determine if we should use new algorithm or not */ @@ -1719,51 +1717,25 @@ count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides) return 0; } - /* Do the iteration */ - memset(coord, 0, ndim * sizeof(npy_intp)); /* Special case for contiguous inner loop */ if (strides[0] == 1) { - do { + NPY_RAW_ITER_START(idim, ndim, coord, shape) { char *d = data; /* Process the innermost dimension */ for (i = 0; i < shape[0]; ++i, ++d) { count += (*d != 0); } - - /* Increment to the next n-dimensional coordinate */ - for (idim = 1; idim < ndim; ++idim) { - if (++coord[idim] == shape[idim]) { - coord[idim] = 0; - data -= (shape[idim] - 1) * strides[idim]; - } - else { - data += strides[idim]; - break; - } - } - } while (i < ndim); + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); } /* General inner loop */ else { - do { + NPY_RAW_ITER_START(idim, ndim, coord, shape) { char *d = data; /* Process the innermost dimension */ for (i = 0; i < shape[0]; ++i, d += strides[0]) { count += (*d != 0); } - - /* Increment to the next n-dimensional coordinate */ - for (idim = 1; idim < ndim; ++idim) { - if (++coord[idim] == shape[idim]) { - coord[idim] = 0; - data -= (shape[idim] - 1) * strides[idim]; - } - else { - data += strides[idim]; - break; - } - } - } while (i < ndim); + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); } return count; diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c index ecb54bc7e..6406dd82e 100644 --- a/numpy/core/src/multiarray/na_mask.c +++ b/numpy/core/src/multiarray/na_mask.c @@ -80,6 +80,50 @@ PyArray_ContainsNA(PyArrayObject *arr) } /* + * Fills a raw array whose dtype has size one with the specified byte + * + * Returns 0 on success, -1 on failure. + */ +static int +fill_raw_byte_array(int ndim, npy_intp *shape, + char *data, npy_intp *strides, char fillvalue) +{ + int idim; + npy_intp shape_it[NPY_MAXDIMS], strides_it[NPY_MAXDIMS]; + npy_intp i, coord[NPY_MAXDIMS]; + + /* Use raw iteration with no heap memory allocation */ + if (PyArray_PrepareOneRawArrayIter( + ndim, shape, + data, strides, + &ndim, shape_it, + &data, strides_it) < 0) { + PyErr_Clear(); + return 1; + } + + /* Special case contiguous inner stride */ + if (strides_it[0] == 1) { + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + /* Process the innermost dimension */ + memset(data, fillvalue, shape_it[0]); + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape_it, data, strides_it); + } + /* General inner stride */ + else { + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + char *d = data; + /* Process the innermost dimension */ + for (i = 0; i < shape_it[0]; ++i, d += strides_it[0]) { + *d = fillvalue; + } + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape_it, data, strides_it); + } + + return 0; +} + +/* * Assigns the mask value to all the NA mask elements of * the array. * @@ -88,11 +132,6 @@ PyArray_ContainsNA(PyArrayObject *arr) NPY_NO_EXPORT int PyArray_AssignMaskNA(PyArrayObject *arr, npy_mask maskvalue) { - int idim, ndim; - char *data; - npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; - npy_intp i, coord[NPY_MAXDIMS]; - /* Need NA support to fill the NA mask */ if (!PyArray_HASMASKNA(arr)) { PyErr_SetString(PyExc_ValueError, @@ -107,35 +146,10 @@ PyArray_AssignMaskNA(PyArrayObject *arr, npy_mask maskvalue) return -1; } - /* Use raw iteration with no heap memory allocation */ - if (PyArray_PrepareOneRawArrayIter( + return fill_raw_byte_array( PyArray_NDIM(arr), PyArray_DIMS(arr), PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr), - &ndim, shape, - &data, strides) < 0) { - PyErr_Clear(); - return 1; - } - - /* Special case contiguous inner stride */ - if (strides[0] == 1) { - NPY_RAW_ITER_START(idim, ndim, coord, shape) { - /* Process the innermost dimension */ - memset(data, maskvalue, shape[0]); - } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); - } - /* General inner stride */ - else { - NPY_RAW_ITER_START(idim, ndim, coord, shape) { - char *d = data; - /* Process the innermost dimension */ - for (i = 0; i < shape[0]; ++i, d += strides[0]) { - *(npy_mask *)d = maskvalue; - } - } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); - } - - return 0; + (char)maskvalue); } /*NUMPY_API @@ -432,3 +446,107 @@ PyArray_IsNA(PyObject *obj) return (PyObject *)ret; } } + +/*NUMPY_API + * + * This function performs a reduction on the masks for an array. + * The masks are provided in raw form, with their strides conformed + * for the reduction. + * + * This is for use with a reduction where 'skipna=False'. + * + * ndim, shape: The geometry of the arrays + * src_dtype, dst_dtype: The NA mask dtypes. + * src_data, dst_data: The NA mask data pointers. + * src_strides, dst_strides: The NA mask strides, matching the geometry. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +PyArray_ReduceMaskArray(int ndim, npy_intp *shape, + PyArray_Descr *src_dtype, char *src_data, npy_intp *src_strides, + PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides) +{ + int idim; + npy_intp i, coord[NPY_MAXDIMS]; + npy_intp shape_it[NPY_MAXDIMS]; + npy_intp src_strides_it[NPY_MAXDIMS]; + npy_intp dst_strides_it[NPY_MAXDIMS]; + + /* Confirm that dst is not larger than src */ + for (idim = 0; idim < ndim; ++idim) { + if (src_strides[idim] == 0 && dst_strides[idim] != 0) { + PyErr_SetString(PyExc_RuntimeError, + "ReduceMaskArray cannot reduce into a larger array"); + return -1; + } + } + + if (src_dtype->type_num != NPY_BOOL || dst_dtype->type_num != NPY_BOOL) { + PyErr_SetString(PyExc_ValueError, + "multi-NA and field-NA are not yet supported"); + return -1; + } + + /* Initialize the destination mask to all ones, exposed data */ + if (fill_raw_byte_array(ndim, shape, dst_data, dst_strides, 1) < 0) { + return -1; + } + + /* + * Sort axes based on 'src', which has more non-zero strides, + * by making it the first operand here + */ + if (PyArray_PrepareTwoRawArrayIter(ndim, shape, + src, src_strides, + dst, dst_strides, + &ndim, shape_it, + &src, src_strides_it, + &dst, dst_strides_it) < 0) { + return NPY_FAIL; + } + + /* Special case a reduction in the inner loop */ + if (dst_strides_it[0] == 0) { + /* Special case a contiguous reduction in the inner loop */ + if (src_strides_it[0] == 1) { + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + /* If there's a zero in src, set dst to zero */ + if (memchr(src, 0, shape_it[0]) != NULL) { + *dst = 0; + } + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, + src, src_strides_it, + dst, dst_strides_it); + } + else { + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + char *src_d = src; + /* If there's a zero in src, set dst to zero */ + for (i = 0; i < shape_it[0]; ++i) { + if (*src_d == 0) { + *dst = 0; + break; + } + src_d += src_strides_it[0] + } + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, + src, src_strides_it, + dst, dst_strides_it); + } + } + else { + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + char *src_d = src, dst_d = dst; + for (i = 0; i < shape_it[0]; ++i) { + *dst_d &= *src_d; + src_d += src_strides_it[0] + dst_d += dst_strides_it[0] + } + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, + src, src_strides_it, + dst, dst_strides_it); + } + + return 0; +} diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index 840f4ec04..a4ba0526d 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -380,7 +380,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape, break; \ } \ } \ - } while ((idim) < (ndim)); \ + } while ((idim) < (ndim)) /* Increment to the next n-dimensional coordinate for two raw arrays */ #define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \ @@ -397,8 +397,7 @@ PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape, break; \ } \ } \ - } while ((idim) < (ndim)); \ - + } while ((idim) < (ndim)) /* |