summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-07-28 12:19:16 -0500
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:48 -0600
commit69e0ed8c58a284ddfd305f2b1ff17cb8d04dd432 (patch)
tree0c5ea20c1e8d4c104fd793a880f2b4877abfd2c0 /numpy
parent30bb48840646c6905d127e6d4c90e78ce197db5d (diff)
downloadnumpy-69e0ed8c58a284ddfd305f2b1ff17cb8d04dd432.tar.gz
ENH: missingdata: Rewrote boolean indexing to support NA masks
Note I haven't hooked it up to the arr[] operator yet...
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h2
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c70
-rw-r--r--numpy/core/src/multiarray/item_selection.c84
-rw-r--r--numpy/core/src/multiarray/item_selection.h13
-rw-r--r--numpy/core/src/multiarray/iterators.c31
-rw-r--r--numpy/core/src/multiarray/mapping.c228
-rw-r--r--numpy/core/src/multiarray/na_mask.c47
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h19
-rw-r--r--numpy/core/tests/test_na.py7
9 files changed, 477 insertions, 24 deletions
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index fe086c997..62cc12233 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1058,6 +1058,8 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter,
#define NPY_ITER_ARRAYMASK 0x20000000
/* Split this operand up into data and an NA mask */
#define NPY_ITER_USE_MASKNA 0x40000000
+/* Iterate over the data, even if it has an NA mask and without USE_MASKNA */
+#define NPY_ITER_IGNORE_MASKNA 0x80000000
#define NPY_ITER_GLOBAL_FLAGS 0x0000ffff
#define NPY_ITER_PER_OP_FLAGS 0xffff0000
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index a14c7de36..5ffd93886 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -3873,6 +3873,76 @@ PyArray_CastRawArrays(npy_intp count,
}
/*
+ * Prepares shape and strides for a simple raw array iteration.
+ * This sorts the strides into FORTRAN order, reverses any negative
+ * strides, then coalesces axes where possible. The results are
+ * filled in the output parameters.
+ *
+ * This is intended for simple, lightweight iteration over arrays
+ * where no buffering of any kind is needed, and the array may
+ * not be stored as a PyArrayObject. For example, to iterate over
+ * the NA mask of an array.
+ *
+ * The arrays shape, out_shape, strides, and out_strides must all
+ * point to different data.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+PyArray_PrepareOneRawArrayIter(int ndim, char *data,
+ npy_intp *shape, npy_intp *strides,
+ int *out_ndim, char **out_data,
+ npy_intp *out_shape, npy_intp *out_strides)
+{
+ _npy_stride_sort_item strideperm[NPY_MAXDIMS];
+ int i, j;
+
+ /* Sort the axes based on the destination strides */
+ PyArray_CreateSortedStridePerm(ndim, strides, strideperm);
+ for (i = 0; i < ndim; ++i) {
+ int iperm = ndim - strideperm[i].perm - 1;
+ out_shape[i] = shape[iperm];
+ out_strides[i] = strides[iperm];
+ }
+
+ /* Reverse any negative strides */
+ for (i = 0; i < ndim; ++i) {
+ npy_intp stride_entry = out_strides[i], shape_entry = out_shape[i];
+
+ if (stride_entry < 0) {
+ data += stride_entry * (shape_entry - 1);
+ out_strides[i] = -stride_entry;
+ }
+ }
+
+ /* Coalesce any dimensions where possible */
+ i = 0;
+ for (j = 1; j < ndim; ++j) {
+ if (out_shape[i] == 1) {
+ /* Drop axis i */
+ out_shape[i] = out_shape[j];
+ out_strides[i] = out_strides[j];
+ }
+ else if (out_shape[j] == 1) {
+ /* Drop axis j */
+ }
+ else if (out_strides[i] * out_shape[i] == out_strides[j]) {
+ /* Coalesce axes i and j */
+ out_shape[i] *= out_shape[j];
+ }
+ else {
+ /* Can't coalesce, go to next i */
+ ++i;
+ }
+ }
+ ndim = i+1;
+
+ *out_data = data;
+ *out_ndim = ndim;
+ return 0;
+}
+
+/*
* Casts the elements from one n-dimensional array to another n-dimensional
* array with identical shape but possibly different strides and dtypes.
* Does not account for overlap.
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 54de27e05..4ba7b7881 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -17,6 +17,7 @@
#include "common.h"
#include "ctors.h"
#include "lowlevel_strided_loops.h"
+#include "item_selection.h"
#define _check_axis PyArray_CheckAxis
@@ -1690,6 +1691,77 @@ PyArray_Compress(PyArrayObject *self, PyObject *condition, int axis,
return ret;
}
+/*
+ * Counts the number of True values in a raw boolean array. This
+ * is a low-overhead function which does no heap allocations.
+ *
+ * Returns -1 on error.
+ */
+NPY_NO_EXPORT npy_intp
+count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides)
+{
+ int idim;
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+ npy_intp count = 0;
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareOneRawArrayIter(
+ ndim, data, ashape, astrides,
+ &ndim, &data, shape, strides) < 0) {
+ return -1;
+ }
+
+ /* Do the iteration */
+ memset(coord, 0, ndim * sizeof(npy_intp));
+ /* Special case for contiguous inner loop */
+ if (strides[0] == 1) {
+ do {
+ char *d = data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i, ++d) {
+ count += (*d != 0);
+ }
+
+ /* Increment to the next n-dimensional coordinate */
+ for (idim = 1; idim < ndim; ++idim) {
+ if (++coord[idim] == shape[idim]) {
+ coord[idim] = 0;
+ data -= (shape[idim] - 1) * strides[idim];
+ }
+ else {
+ data += strides[idim];
+ break;
+ }
+ }
+ } while (i < ndim);
+ }
+ /* General inner loop */
+ else {
+ do {
+ char *d = data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i, d += strides[0]) {
+ count += (*d != 0);
+ }
+
+ /* Increment to the next n-dimensional coordinate */
+ for (idim = 1; idim < ndim; ++idim) {
+ if (++coord[idim] == shape[idim]) {
+ coord[idim] = 0;
+ data -= (shape[idim] - 1) * strides[idim];
+ }
+ else {
+ data += strides[idim];
+ break;
+ }
+ }
+ } while (i < ndim);
+ }
+
+ return count;
+}
+
/*NUMPY_API
* Counts the number of non-zero elements in the array
*
@@ -1698,7 +1770,7 @@ PyArray_Compress(PyArrayObject *self, PyObject *condition, int axis,
NPY_NO_EXPORT npy_intp
PyArray_CountNonzero(PyArrayObject *self)
{
- PyArray_NonzeroFunc *nonzero = PyArray_DESCR(self)->f->nonzero;
+ PyArray_NonzeroFunc *nonzero;
char *data;
npy_intp stride, count;
npy_intp nonzero_count = 0;
@@ -1708,6 +1780,14 @@ PyArray_CountNonzero(PyArrayObject *self)
char **dataptr;
npy_intp *strideptr, *innersizeptr;
+ /* Special low-overhead version specific to the boolean type */
+ if (PyArray_DESCR(self)->type_num == NPY_BOOL) {
+ return count_boolean_trues(PyArray_NDIM(self), PyArray_DATA(self),
+ PyArray_DIMS(self), PyArray_STRIDES(self));
+ }
+
+ nonzero = PyArray_DESCR(self)->f->nonzero;
+
/* If it's a trivial one-dimensional loop, don't use an iterator */
if (PyArray_TRIVIALLY_ITERABLE(self)) {
PyArray_PREPARE_TRIVIAL_ITERATION(self, count, data, stride);
@@ -1767,7 +1847,7 @@ PyArray_CountNonzero(PyArrayObject *self)
NpyIter_Deallocate(iter);
- return nonzero_count;
+ return PyErr_Occurred() ? -1 : nonzero_count;
}
/*NUMPY_API
diff --git a/numpy/core/src/multiarray/item_selection.h b/numpy/core/src/multiarray/item_selection.h
new file mode 100644
index 000000000..9023908ca
--- /dev/null
+++ b/numpy/core/src/multiarray/item_selection.h
@@ -0,0 +1,13 @@
+#ifndef _NPY_PRIVATE__ITEM_SELECTION_H_
+#define _NPY_PRIVATE__ITEM_SELECTION_H_
+
+/*
+ * Counts the number of True values in a raw boolean array. This
+ * is a low-overhead function which does no heap allocations.
+ *
+ * Returns -1 on error.
+ */
+NPY_NO_EXPORT npy_intp
+count_boolean_trues(int ndim, char *data, npy_intp *ashape, npy_intp *astrides);
+
+#endif
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 8371702f9..69988f553 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -397,14 +397,22 @@ NPY_NO_EXPORT PyObject *
PyArray_IterNew(PyObject *obj)
{
PyArrayIterObject *it;
- PyArrayObject *ao = (PyArrayObject *)obj;
+ PyArrayObject *ao;
- if (!PyArray_Check(ao)) {
+ if (!PyArray_Check(obj)) {
PyErr_BadInternalCall();
return NULL;
}
+ ao = (PyArrayObject *)obj;
+
+ if (PyArray_HASMASKNA(ao)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Old-style NumPy iterators do not support NA masks, "
+ "use numpy.nditer instead");
+ return NULL;
+ }
- it = (PyArrayIterObject *)_pya_malloc(sizeof(PyArrayIterObject));
+ it = (PyArrayIterObject *)PyArray_malloc(sizeof(PyArrayIterObject));
PyObject_Init((PyObject *)it, &PyArrayIter_Type);
/* it = PyObject_New(PyArrayIterObject, &PyArrayIter_Type);*/
if (it == NULL) {
@@ -425,6 +433,13 @@ PyArray_BroadcastToShape(PyObject *obj, npy_intp *dims, int nd)
int i, diff, j, compat, k;
PyArrayObject *ao = (PyArrayObject *)obj;
+ if (PyArray_HASMASKNA(ao)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Old-style NumPy iterators do not support NA masks, "
+ "use numpy.nditer instead");
+ return NULL;
+ }
+
if (PyArray_NDIM(ao) > nd) {
goto err;
}
@@ -442,7 +457,7 @@ PyArray_BroadcastToShape(PyObject *obj, npy_intp *dims, int nd)
if (!compat) {
goto err;
}
- it = (PyArrayIterObject *)_pya_malloc(sizeof(PyArrayIterObject));
+ it = (PyArrayIterObject *)PyArray_malloc(sizeof(PyArrayIterObject));
PyObject_Init((PyObject *)it, &PyArrayIter_Type);
if (it == NULL) {
@@ -1506,7 +1521,7 @@ PyArray_MultiIterFromObjects(PyObject **mps, int n, int nadd, ...)
"array objects (inclusive).", NPY_MAXARGS);
return NULL;
}
- multi = _pya_malloc(sizeof(PyArrayMultiIterObject));
+ multi = PyArray_malloc(sizeof(PyArrayMultiIterObject));
if (multi == NULL) {
return PyErr_NoMemory();
}
@@ -1571,7 +1586,7 @@ PyArray_MultiIterNew(int n, ...)
/* fprintf(stderr, "multi new...");*/
- multi = _pya_malloc(sizeof(PyArrayMultiIterObject));
+ multi = PyArray_malloc(sizeof(PyArrayMultiIterObject));
if (multi == NULL) {
return PyErr_NoMemory();
}
@@ -1634,7 +1649,7 @@ arraymultiter_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *k
return NULL;
}
- multi = _pya_malloc(sizeof(PyArrayMultiIterObject));
+ multi = PyArray_malloc(sizeof(PyArrayMultiIterObject));
if (multi == NULL) {
return PyErr_NoMemory();
}
@@ -2031,7 +2046,7 @@ PyArray_NeighborhoodIterNew(PyArrayIterObject *x, npy_intp *bounds,
int i;
PyArrayNeighborhoodIterObject *ret;
- ret = _pya_malloc(sizeof(*ret));
+ ret = PyArray_malloc(sizeof(*ret));
if (ret == NULL) {
return NULL;
}
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 396f0a9e4..10e3f82b2 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -16,6 +16,8 @@
#include "iterators.h"
#include "mapping.h"
#include "na_singleton.h"
+#include "lowlevel_strided_loops.h"
+#include "item_selection.h"
#define SOBJ_NOTFANCY 0
#define SOBJ_ISFANCY 1
@@ -496,7 +498,6 @@ static int
fancy_indexing_check(PyObject *args)
{
int i, n;
- PyObject *obj;
int retval = SOBJ_NOTFANCY;
if (PyTuple_Check(args)) {
@@ -505,10 +506,11 @@ fancy_indexing_check(PyObject *args)
return SOBJ_TOOMANY;
}
for (i = 0; i < n; i++) {
- obj = PyTuple_GET_ITEM(args,i);
+ PyObject *obj = PyTuple_GET_ITEM(args,i);
if (PyArray_Check(obj)) {
- if (PyArray_ISINTEGER((PyArrayObject *)obj) ||
- PyArray_ISBOOL((PyArrayObject *)obj)) {
+ int type_num = PyArray_DESCR((PyArrayObject *)obj)->type_num;
+ if (PyTypeNum_ISINTEGER(type_num) ||
+ PyTypeNum_ISBOOL(type_num)) {
retval = SOBJ_ISFANCY;
}
else {
@@ -522,8 +524,8 @@ fancy_indexing_check(PyObject *args)
}
}
else if (PyArray_Check(args)) {
- if ((PyArray_TYPE((PyArrayObject *)args)==NPY_BOOL) ||
- (PyArray_ISINTEGER((PyArrayObject *)args))) {
+ int type_num = PyArray_DESCR((PyArrayObject *)args)->type_num;
+ if (PyTypeNum_ISINTEGER(type_num) || PyTypeNum_ISBOOL(type_num)) {
return SOBJ_ISFANCY;
}
else {
@@ -543,13 +545,14 @@ fancy_indexing_check(PyObject *args)
return SOBJ_ISFANCY;
}
for (i = 0; i < n; i++) {
- obj = PySequence_GetItem(args, i);
+ PyObject *obj = PySequence_GetItem(args, i);
if (obj == NULL) {
return SOBJ_ISFANCY;
}
if (PyArray_Check(obj)) {
- if (PyArray_ISINTEGER((PyArrayObject *)obj) ||
- PyArray_ISBOOL((PyArrayObject *)obj)) {
+ int type_num = PyArray_DESCR((PyArrayObject *)obj)->type_num;
+ if (PyTypeNum_ISINTEGER(type_num) ||
+ PyTypeNum_ISBOOL(type_num)) {
retval = SOBJ_LISTTUP;
}
else {
@@ -560,7 +563,7 @@ fancy_indexing_check(PyObject *args)
retval = SOBJ_LISTTUP;
}
else if (PySlice_Check(obj) || obj == Py_Ellipsis ||
- obj == Py_None) {
+ obj == Py_None) {
retval = SOBJ_NOTFANCY;
}
Py_DECREF(obj);
@@ -672,6 +675,211 @@ array_subscript_simple(PyArrayObject *self, PyObject *op)
return (PyObject *)ret;
}
+/*
+ * Implements boolean indexing. This produces a one-dimensional
+ * array which picks out all of the elements of 'self' for which
+ * the corresponding element of 'op' is True.
+ *
+ * This operation is somewhat unfortunate, because to produce
+ * a one-dimensional output array, it has to choose a particular
+ * iteration order, in the case of NumPy that is always C order even
+ * though this function allows different choices.
+ */
+NPY_NO_EXPORT PyArrayObject *
+array_boolean_subscript(PyArrayObject *self, PyArrayObject *bmask, NPY_ORDER order)
+{
+ npy_intp size, itemsize;
+ char *ret_data, *ret_maskna_data = NULL;
+ PyArray_Descr *dtype;
+ PyArrayObject *ret;
+ int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0;
+
+ if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
+ PyErr_SetString(PyExc_TypeError,
+ "NumPy boolean array indexing requires a boolean index");
+ return NULL;
+ }
+
+ /* See the Boolean Indexing section of the missing data NEP */
+ if (PyArray_ContainsNA(bmask)) {
+ PyErr_SetString(PyExc_ValueError,
+ "The boolean mask array may not contain any NA values");
+ return NULL;
+ }
+
+ /*
+ * Since we've checked that the mask contains no NAs, we
+ * can do a straightforward count of the boolean True values
+ * in the raw mask data array.
+ */
+ size = count_boolean_trues(PyArray_NDIM(bmask), PyArray_DATA(bmask),
+ PyArray_DIMS(bmask), PyArray_STRIDES(bmask));
+ /* Correction factor for broadcasting 'bmask' to 'self' */
+ size *= PyArray_SIZE(self) / PyArray_SIZE(bmask);
+
+ /* Allocate the output of the boolean indexing */
+ dtype = PyArray_DESCR(self);
+ Py_INCREF(dtype);
+ ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self), dtype, 1, &size,
+ NULL, NULL, 0, (PyObject *)self);
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ /* Allocate an NA mask for ret if required */
+ if (self_has_maskna) {
+ if (PyArray_AllocateMaskNA(ret, 1, 0) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ ret_maskna_data = PyArray_MASKNA_DATA(ret);
+ }
+
+ itemsize = dtype->elsize;
+ ret_data = PyArray_DATA(ret);
+
+ /* Create an iterator for the data */
+ if (size > 0) {
+ NpyIter *iter;
+ PyArrayObject *op[2] = {self, bmask};
+ npy_uint32 flags, op_flags[2];
+ npy_intp fixed_strides[3];
+ PyArray_StridedTransferFn *stransfer = NULL;
+ NpyAuxData *transferdata = NULL;
+
+ NpyIter_IterNextFunc *iternext;
+ npy_intp innersize, *innerstrides;
+ char **dataptrs;
+
+ /* Set up the iterator */
+ flags = NPY_ITER_EXTERNAL_LOOP;
+ if (self_has_maskna) {
+ op_flags[0] = NPY_ITER_READONLY | NPY_ITER_USE_MASKNA;
+ }
+ else {
+ op_flags[0] = NPY_ITER_READONLY;
+ }
+ /*
+ * Since we already checked PyArray_ContainsNA(bmask), can
+ * ignore any MASKNA of bmask.
+ */
+ op_flags[1] = NPY_ITER_READONLY | NPY_ITER_IGNORE_MASKNA;
+
+ iter = NpyIter_MultiNew(2, op, flags, order, NPY_NO_CASTING,
+ op_flags, NULL);
+ if (iter == NULL) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ /* Get a dtype transfer function */
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ if (PyArray_GetDTypeTransferFunction(PyArray_ISALIGNED(self),
+ fixed_strides[0], itemsize,
+ dtype, dtype,
+ 0,
+ &stransfer, &transferdata,
+ &needs_api) != NPY_SUCCEED) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ /* Get the values needed for the inner loop */
+ iternext = NpyIter_GetIterNext(iter, NULL);
+ if (iternext == NULL) {
+ Py_DECREF(ret);
+ NPY_AUXDATA_FREE(transferdata);
+ return NULL;
+ }
+ innerstrides = NpyIter_GetInnerStrideArray(iter);
+ dataptrs = NpyIter_GetDataPtrArray(iter);
+
+ /* Regular inner loop */
+ if (!self_has_maskna) {
+ npy_intp self_stride = innerstrides[0];
+ npy_intp bmask_stride = innerstrides[1];
+ npy_intp subloopsize;
+ do {
+ innersize = *NpyIter_GetInnerLoopSizePtr(iter);
+ char *self_data = dataptrs[0];
+ char *bmask_data = dataptrs[1];
+
+ while (innersize > 0) {
+ /* Skip masked values */
+ subloopsize = 0;
+ while (subloopsize < innersize && *bmask_data == 0) {
+ ++subloopsize;
+ bmask_data += bmask_stride;
+ }
+ innersize -= subloopsize;
+ self_data += subloopsize * self_stride;
+ /* Process unmasked values */
+ subloopsize = 0;
+ while (subloopsize < innersize && *bmask_data != 0) {
+ ++subloopsize;
+ bmask_data += bmask_stride;
+ }
+ stransfer(ret_data, itemsize, self_data, self_stride,
+ subloopsize, itemsize, transferdata);
+ innersize -= subloopsize;
+ self_data += subloopsize * self_stride;
+ }
+ } while (iternext(iter));
+ }
+ /* NA masked inner loop */
+ else {
+ npy_intp i;
+ npy_intp self_stride = innerstrides[0];
+ npy_intp bmask_stride = innerstrides[1];
+ npy_intp maskna_stride = innerstrides[2];
+ npy_intp subloopsize;
+ do {
+ innersize = *NpyIter_GetInnerLoopSizePtr(iter);
+ char *self_data = dataptrs[0];
+ char *bmask_data = dataptrs[1];
+ char *maskna_data = dataptrs[2];
+
+ while (innersize > 0) {
+ /* Skip masked values */
+ subloopsize = 0;
+ while (subloopsize < innersize && *bmask_data == 0) {
+ ++subloopsize;
+ bmask_data += bmask_stride;
+ }
+ innersize -= subloopsize;
+ self_data += subloopsize * self_stride;
+ /* Process unmasked values */
+ subloopsize = 0;
+ while (subloopsize < innersize && *bmask_data != 0) {
+ ++subloopsize;
+ bmask_data += bmask_stride;
+ }
+ /*
+ * Because it's a newly allocated array, we
+ * don't have to be careful about not overwriting
+ * NA masked values. If 'ret' were an output parameter,
+ * we would have to avoid that.
+ */
+ stransfer(ret_data, itemsize, self_data, self_stride,
+ subloopsize, itemsize, transferdata);
+ /* Copy the mask */
+ for (i = 0; i < subloopsize; ++i) {
+ *ret_maskna_data++ = *maskna_data;
+ maskna_data += maskna_stride;
+ }
+ innersize -= subloopsize;
+ self_data += subloopsize * self_stride;
+ }
+ } while (iternext(iter));
+ }
+
+ NpyIter_Deallocate(iter);
+ NPY_AUXDATA_FREE(transferdata);
+ }
+
+ return ret;
+}
+
NPY_NO_EXPORT PyObject *
array_subscript(PyArrayObject *self, PyObject *op)
{
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index 92eab30e5..b7af2c824 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -43,11 +43,50 @@ NPY_NO_EXPORT npy_bool
PyArray_ContainsNA(PyArrayObject *arr)
{
/* Need NA support to contain NA */
- if (!PyArray_HasNASupport(arr)) {
- return 0;
- }
+ if (PyArray_HASMASKNA(arr)) {
+ int idim, ndim;
+ char *data;
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+
+ if (PyArray_HASFIELDS(arr)) {
+ /* TODO: need to add field-NA support */
+ return 1;
+ }
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareOneRawArrayIter(
+ PyArray_NDIM(arr), PyArray_MASKNA_DATA(arr),
+ PyArray_DIMS(arr), PyArray_MASKNA_STRIDES(arr),
+ &ndim, &data, shape, strides) < 0) {
+ PyErr_Clear();
+ return 1;
+ }
+
+ /* Do the iteration */
+ memset(coord, 0, ndim * sizeof(npy_intp));
+ do {
+ char *d = data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i, d += strides[0]) {
+ if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
+ return 1;
+ }
+ }
- /* TODO: Loop through NA mask */
+ /* Increment to the next n-dimensional coordinate */
+ for (idim = 1; idim < ndim; ++idim) {
+ if (++coord[idim] == shape[idim]) {
+ coord[idim] = 0;
+ data -= (shape[idim] - 1) * strides[idim];
+ }
+ else {
+ data += strides[idim];
+ break;
+ }
+ }
+ } while (i < ndim);
+ }
return 0;
}
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index c606e58f3..e9a1dd40d 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -316,6 +316,25 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
PyArray_MaskedStridedTransferFn *stransfer,
NpyAuxData *data);
+/*
+ * Prepares shape and strides for a simple raw array iteration.
+ * This sorts the strides into FORTRAN order, reverses any negative
+ * strides, then coalesces axes where possible. The results are
+ * filled in the output parameters.
+ *
+ * This is intended for simple, lightweight iteration over arrays
+ * where no buffering of any kind is needed, and the array may
+ * not be stored as a PyArrayObject. For example, to iterate over
+ * the NA mask of an array.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+PyArray_PrepareOneRawArrayIter(int ndim, char *data,
+ npy_intp *shape, npy_intp *strides,
+ int *out_ndim, char **out_data,
+ npy_intp *out_shape, npy_intp *out_strides);
+
/*
* TRIVIAL ITERATION
diff --git a/numpy/core/tests/test_na.py b/numpy/core/tests/test_na.py
index 3b179b126..98ad67f27 100644
--- a/numpy/core/tests/test_na.py
+++ b/numpy/core/tests/test_na.py
@@ -225,6 +225,13 @@ def test_array_maskna_isna_1D():
a[2:10:3] = np.NA
assert_equal(np.isna(a), [0,0,1,1,0,1,1,0,1,0])
+ # Checking isna of a boolean mask index
+ mask = np.array([1,1,0,0,0,1,0,1,1,0], dtype='?')
+ assert_equal(np.isna(a[mask]), [0,0,1,0,1])
+ # Assigning NA to a boolean masked index
+ a[mask] = np.NA
+ assert_equal(np.isna(a), [1,1,1,1,0,1,1,1,1,0])
+
# TODO: fancy indexing is next...