diff options
-rw-r--r-- | doc/neps/missing-data.rst | 32 | ||||
-rw-r--r-- | numpy/core/include/numpy/ndarraytypes.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 70 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 84 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.h | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/iterators.c | 31 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 228 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_mask.c | 47 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 19 | ||||
-rw-r--r-- | numpy/core/tests/test_na.py | 7 |
10 files changed, 509 insertions, 24 deletions
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst index 038ea9370..e83bd2189 100644 --- a/doc/neps/missing-data.rst +++ b/doc/neps/missing-data.rst @@ -743,6 +743,38 @@ to be consistent with the result of np.sum([]):: >>> np.sum([]) 0.0 +Boolean Indexing +================ + +Indexing using a boolean array containing NAs does not have a consistent +interpretation according to the NA abstraction. For example:: + + >>> a = np.array([1, 2]) + >>> mask = np.array([np.NA, True], maskna=True) + >>> a[mask] + What should happen here? + +Since the NA represents a valid but unknown value, and it is a boolean, +it has two possible underlying values:: + + >>> a[np.array([True, True])] + array([1, 2]) + >>> a[np.array([False, True])] + array([2]) + +The thing which changes is the length of the output array, nothing which +itself can be substituted for NA. For this reason, at least initially, +NumPy will raise an exception for this case. + +Another possibility is to add an inconsistency, and follow the approach +R uses. That is, to produce the following:: + + >>> a[mask] + array([NA, 2], maskna=True) + +If, in user testing, this is found necessary for pragmatic reasons, +the feature should be added even though it is inconsistent. + PEP 3118 ======== diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index fe086c997..62cc12233 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1058,6 +1058,8 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter, #define NPY_ITER_ARRAYMASK 0x20000000 /* Split this operand up into data and an NA mask */ #define NPY_ITER_USE_MASKNA 0x40000000 +/* Iterate over the data, even if it has an NA mask and without USE_MASKNA */ +#define NPY_ITER_IGNORE_MASKNA 0x80000000 #define NPY_ITER_GLOBAL_FLAGS 0x0000ffff #define NPY_ITER_PER_OP_FLAGS 0xffff0000 diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index a14c7de36..5ffd93886 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -3873,6 +3873,76 @@ PyArray_CastRawArrays(npy_intp count, } /* + * Prepares shape and strides for a simple raw array iteration. + * This sorts the strides into FORTRAN order, reverses any negative + * strides, then coalesces axes where possible. The results are + * filled in the output parameters. + * + * This is intended for simple, lightweight iteration over arrays + * where no buffering of any kind is needed, and the array may + * not be stored as a PyArrayObject. For example, to iterate over + * the NA mask of an array. + * + * The arrays shape, out_shape, strides, and out_strides must all + * point to different data. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +PyArray_PrepareOneRawArrayIter(int ndim, char *data, + npy_intp *shape, npy_intp *strides, + int *out_ndim, char **out_data, + npy_intp *out_shape, npy_intp *out_strides) +{ + _npy_stride_sort_item strideperm[NPY_MAXDIMS]; + int i, j; + + /* Sort the axes based on the destination strides */ + PyArray_CreateSortedStridePerm(ndim, strides, strideperm); + for (i = 0; i < ndim; ++i) { + int iperm = ndim - strideperm[i].perm - 1; + out_shape[i] = shape[iperm]; + out_strides[i] = strides[iperm]; + } + + /* Reverse any negative strides */ + for (i = 0; i < ndim; ++i) { + npy_intp stride_entry = out_strides[i], shape_entry = out_shape[i]; + + if (stride_entry < 0) { + data += stride_entry * (shape_entry - 1); + out_strides[i] = -stride_entry; + } + } + + /* Coalesce any dimensions where possible */ + i = 0; + for (j = 1; j < ndim; ++j) { + if (out_shape[i] == 1) { + /* Drop axis i */ + out_shape[i] = out_shape[j]; + out_strides[i] = out_strides[j]; + } + else if (out_shape[j] == 1) { + /* Drop axis j */ + } + else if (out_strides[i] * out_shape[i] == out_strides[j]) { + /* Coalesce axes i and j */ + out_shape[i] *= out_shape[j]; + } + else { + /* Can't coalesce, go to next i */ + ++i; + } + } + ndim = i+1; + + *out_data = data; + *out_ndim = ndim; + return 0; +} + +/* * Casts the elements from one n-dimensional array to another n-dimensional * array with identical shape but possibly different strides and dtypes. * Does not account for overlap. diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 54de27e05..4ba7b7881 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -17,6 +17,7 @@ #include "common.h" #include "ctors.h" #include "lowlevel_strided_loops.h" +#include "item_selection.h" #define _check_axis PyArray_CheckAxis @@ -1690,6 +1691,77 @@ PyArray_Compress(PyArrayObject *self, PyObject *condition, int axis, return ret; } +/* + * Counts the number of True values in a raw boolean array. This + * is a low-overhead function which does no heap allocations. + * + * Returns -1 on error. + */ +NPY_NO_EXPORT npy_intp +count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides) +{ + int idim; + npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; + npy_intp i, coord[NPY_MAXDIMS]; + npy_intp count = 0; + + /* Use raw iteration with no heap memory allocation */ + if (PyArray_PrepareOneRawArrayIter( + ndim, data, ashape, astrides, + &ndim, &data, shape, strides) < 0) { + return -1; + } + + /* Do the iteration */ + memset(coord, 0, ndim * sizeof(npy_intp)); + /* Special case for contiguous inner loop */ + if (strides[0] == 1) { + do { + char *d = data; + /* Process the innermost dimension */ + for (i = 0; i < shape[0]; ++i, ++d) { + count += (*d != 0); + } + + /* Increment to the next n-dimensional coordinate */ + for (idim = 1; idim < ndim; ++idim) { + if (++coord[idim] == shape[idim]) { + coord[idim] = 0; + data -= (shape[idim] - 1) * strides[idim]; + } + else { + data += strides[idim]; + break; + } + } + } while (i < ndim); + } + /* General inner loop */ + else { + do { + char *d = data; + /* Process the innermost dimension */ + for (i = 0; i < shape[0]; ++i, d += strides[0]) { + count += (*d != 0); + } + + /* Increment to the next n-dimensional coordinate */ + for (idim = 1; idim < ndim; ++idim) { + if (++coord[idim] == shape[idim]) { + coord[idim] = 0; + data -= (shape[idim] - 1) * strides[idim]; + } + else { + data += strides[idim]; + break; + } + } + } while (i < ndim); + } + + return count; +} + /*NUMPY_API * Counts the number of non-zero elements in the array * @@ -1698,7 +1770,7 @@ PyArray_Compress(PyArrayObject *self, PyObject *condition, int axis, NPY_NO_EXPORT npy_intp PyArray_CountNonzero(PyArrayObject *self) { - PyArray_NonzeroFunc *nonzero = PyArray_DESCR(self)->f->nonzero; + PyArray_NonzeroFunc *nonzero; char *data; npy_intp stride, count; npy_intp nonzero_count = 0; @@ -1708,6 +1780,14 @@ PyArray_CountNonzero(PyArrayObject *self) char **dataptr; npy_intp *strideptr, *innersizeptr; + /* Special low-overhead version specific to the boolean type */ + if (PyArray_DESCR(self)->type_num == NPY_BOOL) { + return count_boolean_trues(PyArray_NDIM(self), PyArray_DATA(self), + PyArray_DIMS(self), PyArray_STRIDES(self)); + } + + nonzero = PyArray_DESCR(self)->f->nonzero; + /* If it's a trivial one-dimensional loop, don't use an iterator */ if (PyArray_TRIVIALLY_ITERABLE(self)) { PyArray_PREPARE_TRIVIAL_ITERATION(self, count, data, stride); @@ -1767,7 +1847,7 @@ PyArray_CountNonzero(PyArrayObject *self) NpyIter_Deallocate(iter); - return nonzero_count; + return PyErr_Occurred() ? -1 : nonzero_count; } /*NUMPY_API diff --git a/numpy/core/src/multiarray/item_selection.h b/numpy/core/src/multiarray/item_selection.h new file mode 100644 index 000000000..9023908ca --- /dev/null +++ b/numpy/core/src/multiarray/item_selection.h @@ -0,0 +1,13 @@ +#ifndef _NPY_PRIVATE__ITEM_SELECTION_H_ +#define _NPY_PRIVATE__ITEM_SELECTION_H_ + +/* + * Counts the number of True values in a raw boolean array. This + * is a low-overhead function which does no heap allocations. + * + * Returns -1 on error. + */ +NPY_NO_EXPORT npy_intp +count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides); + +#endif diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c index 8371702f9..69988f553 100644 --- a/numpy/core/src/multiarray/iterators.c +++ b/numpy/core/src/multiarray/iterators.c @@ -397,14 +397,22 @@ NPY_NO_EXPORT PyObject * PyArray_IterNew(PyObject *obj) { PyArrayIterObject *it; - PyArrayObject *ao = (PyArrayObject *)obj; + PyArrayObject *ao; - if (!PyArray_Check(ao)) { + if (!PyArray_Check(obj)) { PyErr_BadInternalCall(); return NULL; } + ao = (PyArrayObject *)obj; + + if (PyArray_HASMASKNA(ao)) { + PyErr_SetString(PyExc_ValueError, + "Old-style NumPy iterators do not support NA masks, " + "use numpy.nditer instead"); + return NULL; + } - it = (PyArrayIterObject *)_pya_malloc(sizeof(PyArrayIterObject)); + it = (PyArrayIterObject *)PyArray_malloc(sizeof(PyArrayIterObject)); PyObject_Init((PyObject *)it, &PyArrayIter_Type); /* it = PyObject_New(PyArrayIterObject, &PyArrayIter_Type);*/ if (it == NULL) { @@ -425,6 +433,13 @@ PyArray_BroadcastToShape(PyObject *obj, npy_intp *dims, int nd) int i, diff, j, compat, k; PyArrayObject *ao = (PyArrayObject *)obj; + if (PyArray_HASMASKNA(ao)) { + PyErr_SetString(PyExc_ValueError, + "Old-style NumPy iterators do not support NA masks, " + "use numpy.nditer instead"); + return NULL; + } + if (PyArray_NDIM(ao) > nd) { goto err; } @@ -442,7 +457,7 @@ PyArray_BroadcastToShape(PyObject *obj, npy_intp *dims, int nd) if (!compat) { goto err; } - it = (PyArrayIterObject *)_pya_malloc(sizeof(PyArrayIterObject)); + it = (PyArrayIterObject *)PyArray_malloc(sizeof(PyArrayIterObject)); PyObject_Init((PyObject *)it, &PyArrayIter_Type); if (it == NULL) { @@ -1506,7 +1521,7 @@ PyArray_MultiIterFromObjects(PyObject **mps, int n, int nadd, ...) "array objects (inclusive).", NPY_MAXARGS); return NULL; } - multi = _pya_malloc(sizeof(PyArrayMultiIterObject)); + multi = PyArray_malloc(sizeof(PyArrayMultiIterObject)); if (multi == NULL) { return PyErr_NoMemory(); } @@ -1571,7 +1586,7 @@ PyArray_MultiIterNew(int n, ...) /* fprintf(stderr, "multi new...");*/ - multi = _pya_malloc(sizeof(PyArrayMultiIterObject)); + multi = PyArray_malloc(sizeof(PyArrayMultiIterObject)); if (multi == NULL) { return PyErr_NoMemory(); } @@ -1634,7 +1649,7 @@ arraymultiter_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *k return NULL; } - multi = _pya_malloc(sizeof(PyArrayMultiIterObject)); + multi = PyArray_malloc(sizeof(PyArrayMultiIterObject)); if (multi == NULL) { return PyErr_NoMemory(); } @@ -2031,7 +2046,7 @@ PyArray_NeighborhoodIterNew(PyArrayIterObject *x, npy_intp *bounds, int i; PyArrayNeighborhoodIterObject *ret; - ret = _pya_malloc(sizeof(*ret)); + ret = PyArray_malloc(sizeof(*ret)); if (ret == NULL) { return NULL; } diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 396f0a9e4..10e3f82b2 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -16,6 +16,8 @@ #include "iterators.h" #include "mapping.h" #include "na_singleton.h" +#include "lowlevel_strided_loops.h" +#include "item_selection.h" #define SOBJ_NOTFANCY 0 #define SOBJ_ISFANCY 1 @@ -496,7 +498,6 @@ static int fancy_indexing_check(PyObject *args) { int i, n; - PyObject *obj; int retval = SOBJ_NOTFANCY; if (PyTuple_Check(args)) { @@ -505,10 +506,11 @@ fancy_indexing_check(PyObject *args) return SOBJ_TOOMANY; } for (i = 0; i < n; i++) { - obj = PyTuple_GET_ITEM(args,i); + PyObject *obj = PyTuple_GET_ITEM(args,i); if (PyArray_Check(obj)) { - if (PyArray_ISINTEGER((PyArrayObject *)obj) || - PyArray_ISBOOL((PyArrayObject *)obj)) { + int type_num = PyArray_DESCR((PyArrayObject *)obj)->type_num; + if (PyTypeNum_ISINTEGER(type_num) || + PyTypeNum_ISBOOL(type_num)) { retval = SOBJ_ISFANCY; } else { @@ -522,8 +524,8 @@ fancy_indexing_check(PyObject *args) } } else if (PyArray_Check(args)) { - if ((PyArray_TYPE((PyArrayObject *)args)==NPY_BOOL) || - (PyArray_ISINTEGER((PyArrayObject *)args))) { + int type_num = PyArray_DESCR((PyArrayObject *)args)->type_num; + if (PyTypeNum_ISINTEGER(type_num) || PyTypeNum_ISBOOL(type_num)) { return SOBJ_ISFANCY; } else { @@ -543,13 +545,14 @@ fancy_indexing_check(PyObject *args) return SOBJ_ISFANCY; } for (i = 0; i < n; i++) { - obj = PySequence_GetItem(args, i); + PyObject *obj = PySequence_GetItem(args, i); if (obj == NULL) { return SOBJ_ISFANCY; } if (PyArray_Check(obj)) { - if (PyArray_ISINTEGER((PyArrayObject *)obj) || - PyArray_ISBOOL((PyArrayObject *)obj)) { + int type_num = PyArray_DESCR((PyArrayObject *)obj)->type_num; + if (PyTypeNum_ISINTEGER(type_num) || + PyTypeNum_ISBOOL(type_num)) { retval = SOBJ_LISTTUP; } else { @@ -560,7 +563,7 @@ fancy_indexing_check(PyObject *args) retval = SOBJ_LISTTUP; } else if (PySlice_Check(obj) || obj == Py_Ellipsis || - obj == Py_None) { + obj == Py_None) { retval = SOBJ_NOTFANCY; } Py_DECREF(obj); @@ -672,6 +675,211 @@ array_subscript_simple(PyArrayObject *self, PyObject *op) return (PyObject *)ret; } +/* + * Implements boolean indexing. This produces a one-dimensional + * array which picks out all of the elements of 'self' for which + * the corresponding element of 'op' is True. + * + * This operation is somewhat unfortunate, because to produce + * a one-dimensional output array, it has to choose a particular + * iteration order, in the case of NumPy that is always C order even + * though this function allows different choices. + */ +NPY_NO_EXPORT PyArrayObject * +array_boolean_subscript(PyArrayObject *self, PyArrayObject *bmask, NPY_ORDER order) +{ + npy_intp size, itemsize; + char *ret_data, *ret_maskna_data = NULL; + PyArray_Descr *dtype; + PyArrayObject *ret; + int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0; + + if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) { + PyErr_SetString(PyExc_TypeError, + "NumPy boolean array indexing requires a boolean index"); + return NULL; + } + + /* See the Boolean Indexing section of the missing data NEP */ + if (PyArray_ContainsNA(bmask)) { + PyErr_SetString(PyExc_ValueError, + "The boolean mask array may not contain any NA values"); + return NULL; + } + + /* + * Since we've checked that the mask contains no NAs, we + * can do a straightforward count of the boolean True values + * in the raw mask data array. + */ + size = count_boolean_trues(PyArray_NDIM(bmask), PyArray_DATA(bmask), + PyArray_DIMS(bmask), PyArray_STRIDES(bmask)); + /* Correction factor for broadcasting 'bmask' to 'self' */ + size *= PyArray_SIZE(self) / PyArray_SIZE(bmask); + + /* Allocate the output of the boolean indexing */ + dtype = PyArray_DESCR(self); + Py_INCREF(dtype); + ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self), dtype, 1, &size, + NULL, NULL, 0, (PyObject *)self); + if (ret == NULL) { + return NULL; + } + + /* Allocate an NA mask for ret if required */ + if (self_has_maskna) { + if (PyArray_AllocateMaskNA(ret, 1, 0) < 0) { + Py_DECREF(ret); + return NULL; + } + ret_maskna_data = PyArray_MASKNA_DATA(ret); + } + + itemsize = dtype->elsize; + ret_data = PyArray_DATA(ret); + + /* Create an iterator for the data */ + if (size > 0) { + NpyIter *iter; + PyArrayObject *op[2] = {self, bmask}; + npy_uint32 flags, op_flags[2]; + npy_intp fixed_strides[3]; + PyArray_StridedTransferFn *stransfer = NULL; + NpyAuxData *transferdata = NULL; + + NpyIter_IterNextFunc *iternext; + npy_intp innersize, *innerstrides; + char **dataptrs; + + /* Set up the iterator */ + flags = NPY_ITER_EXTERNAL_LOOP; + if (self_has_maskna) { + op_flags[0] = NPY_ITER_READONLY | NPY_ITER_USE_MASKNA; + } + else { + op_flags[0] = NPY_ITER_READONLY; + } + /* + * Since we already checked PyArray_ContainsNA(bmask), can + * ignore any MASKNA of bmask. + */ + op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA; + + iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING, + op_flags, NULL); + if (iter == NULL) { + Py_DECREF(ret); + return NULL; + } + + /* Get a dtype transfer function */ + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + if (PyArray_GetDTypeTransferFunction(PyArray_ISALIGNED(self), + fixed_strides[0], itemsize, + dtype, dtype, + 0, + &stransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + Py_DECREF(ret); + return NULL; + } + + /* Get the values needed for the inner loop */ + iternext = NpyIter_GetIterNext(iter, NULL); + if (iternext == NULL) { + Py_DECREF(ret); + NPY_AUXDATA_FREE(transferdata); + return NULL; + } + innerstrides = NpyIter_GetInnerStrideArray(iter); + dataptrs = NpyIter_GetDataPtrArray(iter); + + /* Regular inner loop */ + if (!self_has_maskna) { + npy_intp self_stride = innerstrides[0]; + npy_intp bmask_stride = innerstrides[1]; + npy_intp subloopsize; + do { + innersize = *NpyIter_GetInnerLoopSizePtr(iter); + char *self_data = dataptrs[0]; + char *bmask_data = dataptrs[1]; + + while (innersize > 0) { + /* Skip masked values */ + subloopsize = 0; + while (subloopsize < innersize && *bmask_data == 0) { + ++subloopsize; + bmask_data += bmask_stride; + } + innersize -= subloopsize; + self_data += subloopsize * self_stride; + /* Process unmasked values */ + subloopsize = 0; + while (subloopsize < innersize && *bmask_data != 0) { + ++subloopsize; + bmask_data += bmask_stride; + } + stransfer(ret_data, itemsize, self_data, self_stride, + subloopsize, itemsize, transferdata); + innersize -= subloopsize; + self_data += subloopsize * self_stride; + } + } while (iternext(iter)); + } + /* NA masked inner loop */ + else { + npy_intp i; + npy_intp self_stride = innerstrides[0]; + npy_intp bmask_stride = innerstrides[1]; + npy_intp maskna_stride = innerstrides[2]; + npy_intp subloopsize; + do { + innersize = *NpyIter_GetInnerLoopSizePtr(iter); + char *self_data = dataptrs[0]; + char *bmask_data = dataptrs[1]; + char *maskna_data = dataptrs[2]; + + while (innersize > 0) { + /* Skip masked values */ + subloopsize = 0; + while (subloopsize < innersize && *bmask_data == 0) { + ++subloopsize; + bmask_data += bmask_stride; + } + innersize -= subloopsize; + self_data += subloopsize * self_stride; + /* Process unmasked values */ + subloopsize = 0; + while (subloopsize < innersize && *bmask_data != 0) { + ++subloopsize; + bmask_data += bmask_stride; + } + /* + * Because it's a newly allocated array, we + * don't have to be careful about not overwriting + * NA masked values. If 'ret' were an output parameter, + * we would have to avoid that. + */ + stransfer(ret_data, itemsize, self_data, self_stride, + subloopsize, itemsize, transferdata); + /* Copy the mask */ + for (i = 0; i < subloopsize; ++i) { + *ret_maskna_data++ = *maskna_data; + maskna_data += maskna_stride; + } + innersize -= subloopsize; + self_data += subloopsize * self_stride; + } + } while (iternext(iter)); + } + + NpyIter_Deallocate(iter); + NPY_AUXDATA_FREE(transferdata); + } + + return ret; +} + NPY_NO_EXPORT PyObject * array_subscript(PyArrayObject *self, PyObject *op) { diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c index 92eab30e5..b7af2c824 100644 --- a/numpy/core/src/multiarray/na_mask.c +++ b/numpy/core/src/multiarray/na_mask.c @@ -43,11 +43,50 @@ NPY_NO_EXPORT npy_bool PyArray_ContainsNA(PyArrayObject *arr) { /* Need NA support to contain NA */ - if (!PyArray_HasNASupport(arr)) { - return 0; - } + if (PyArray_HASMASKNA(arr)) { + int idim, ndim; + char *data; + npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; + npy_intp i, coord[NPY_MAXDIMS]; + + if (PyArray_HASFIELDS(arr)) { + /* TODO: need to add field-NA support */ + return 1; + } + + /* Use raw iteration with no heap memory allocation */ + if (PyArray_PrepareOneRawArrayIter( + PyArray_NDIM(arr), PyArray_MASKNA_DATA(arr), + PyArray_DIMS(arr), PyArray_MASKNA_STRIDES(arr), + &ndim, &data, shape, strides) < 0) { + PyErr_Clear(); + return 1; + } + + /* Do the iteration */ + memset(coord, 0, ndim * sizeof(npy_intp)); + do { + char *d = data; + /* Process the innermost dimension */ + for (i = 0; i < shape[0]; ++i, d += strides[0]) { + if (!NpyMaskValue_IsExposed((npy_mask)(*d))) { + return 1; + } + } - /* TODO: Loop through NA mask */ + /* Increment to the next n-dimensional coordinate */ + for (idim = 1; idim < ndim; ++idim) { + if (++coord[idim] == shape[idim]) { + coord[idim] = 0; + data -= (shape[idim] - 1) * strides[idim]; + } + else { + data += strides[idim]; + break; + } + } + } while (i < ndim); + } return 0; } diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index c606e58f3..e9a1dd40d 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -316,6 +316,25 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, PyArray_MaskedStridedTransferFn *stransfer, NpyAuxData *data); +/* + * Prepares shape and strides for a simple raw array iteration. + * This sorts the strides into FORTRAN order, reverses any negative + * strides, then coalesces axes where possible. The results are + * filled in the output parameters. + * + * This is intended for simple, lightweight iteration over arrays + * where no buffering of any kind is needed, and the array may + * not be stored as a PyArrayObject. For example, to iterate over + * the NA mask of an array. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +PyArray_PrepareOneRawArrayIter(int ndim, char *data, + npy_intp *shape, npy_intp *strides, + int *out_ndim, char **out_data, + npy_intp *out_shape, npy_intp *out_strides); + /* * TRIVIAL ITERATION diff --git a/numpy/core/tests/test_na.py b/numpy/core/tests/test_na.py index 3b179b126..98ad67f27 100644 --- a/numpy/core/tests/test_na.py +++ b/numpy/core/tests/test_na.py @@ -225,6 +225,13 @@ def test_array_maskna_isna_1D(): a[2:10:3] = np.NA assert_equal(np.isna(a), [0,0,1,1,0,1,1,0,1,0]) + # Checking isna of a boolean mask index + mask = np.array([1,1,0,0,0,1,0,1,1,0], dtype='?') + assert_equal(np.isna(a[mask]), [0,0,1,0,1]) + # Assigning NA to a boolean masked index + a[mask] = np.NA + assert_equal(np.isna(a), [1,1,1,1,0,1,1,1,1,0]) + # TODO: fancy indexing is next... |