summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/2.0.0-notes.rst5
-rw-r--r--numpy/core/src/multiarray/array_assign_array.c13
-rw-r--r--numpy/core/src/multiarray/array_assign_scalar.c6
-rw-r--r--numpy/core/src/multiarray/common.c6
-rw-r--r--numpy/core/src/multiarray/ctors.c6
-rw-r--r--numpy/core/src/multiarray/item_selection.c26
-rw-r--r--numpy/core/src/multiarray/mapping.c24
-rw-r--r--numpy/core/src/multiarray/na_mask.c146
-rw-r--r--numpy/core/src/multiarray/reduction.c29
-rw-r--r--numpy/core/src/umath/ufunc_object.c10
-rw-r--r--numpy/core/tests/test_maskna.py29
11 files changed, 246 insertions, 54 deletions
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst
index e8e3ae4c5..d1ebd1b75 100644
--- a/doc/release/2.0.0-notes.rst
+++ b/doc/release/2.0.0-notes.rst
@@ -14,8 +14,9 @@ New features
Mask-based NA missing values
----------------------------
-Support for NA missing values similar to those in R has been implemented.
-This was done by adding optional NA masks to the core array object.
+Preliminary support for NA missing values similar to those in R has
+been implemented. This was done by adding optional NA masks to the core
+array object.
While a significant amount of the NumPy functionality has been extended to
support NA masks, not everything is yet supported. Here is an (incomplete)
diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c
index 6734d845a..eae1d10d1 100644
--- a/numpy/core/src/multiarray/array_assign_array.c
+++ b/numpy/core/src/multiarray/array_assign_array.c
@@ -480,8 +480,11 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
}
if (src_has_maskna && !dst_has_maskna) {
- /* TODO: add 'wheremask' as a parameter to ContainsNA */
- if (PyArray_ContainsNA(src)) {
+ int containsna = PyArray_ContainsNA(src, wheremask, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
@@ -655,8 +658,12 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
}
else {
npy_intp wheremask_strides[NPY_MAXDIMS];
+ int containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
- if (PyArray_ContainsNA(wheremask)) {
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
if (!dst_has_maskna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 9200c856e..a1e2192c1 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -431,8 +431,12 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
}
else {
npy_intp wheremask_strides[NPY_MAXDIMS];
+ int containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
- if (PyArray_ContainsNA(wheremask)) {
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
if (!dst_has_maskna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 45c7558de..d146b2a51 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -101,7 +101,11 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
/* Check if it's an ndarray */
if (PyArray_Check(obj)) {
/* Check for any NAs in the array */
- if (PyArray_ContainsNA((PyArrayObject *)obj)) {
+ int containsna = PyArray_ContainsNA((PyArrayObject *)obj, NULL, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
*out_contains_na = 1;
}
dtype = PyArray_DESCR((PyArrayObject *)obj);
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 2b1edd000..183f90e9b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -2620,7 +2620,11 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
baseflags |= NPY_ITER_USE_MASKNA;
}
else {
- if (PyArray_ContainsNA(src)) {
+ int containsna = PyArray_ContainsNA(src, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 32f190a4d..472f254ed 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -123,11 +123,17 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
if (PyArray_HASMASKNA(obj)) {
use_maskna = 1;
}
- else if (PyArray_ContainsNA(self)) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot assign NA to an array which "
- "does not support NAs");
- goto fail;
+ else {
+ int containsna = PyArray_ContainsNA(self, NULL, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot assign NA to an array which "
+ "does not support NAs");
+ goto fail;
+ }
}
}
}
@@ -2009,10 +2015,10 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
return NULL;
}
- result = PyArray_ReduceWrapper(arr, out,
+ result = PyArray_ReduceWrapper(arr, out, NULL,
PyArray_DESCR(arr), dtype,
NPY_SAME_KIND_CASTING,
- axis_flags, 1, skipna, keepdims,
+ axis_flags, 1, skipna, NULL, keepdims,
&assign_reduce_identity_zero,
&reduce_count_nonzero_loop,
&reduce_count_nonzero_masked_loop,
@@ -2047,7 +2053,11 @@ PyArray_CountNonzero(PyArrayObject *self)
/* If 'self' has an NA mask, make sure it has no NA values */
if (PyArray_HASMASKNA(self)) {
- if (PyArray_ContainsNA(self)) {
+ int containsna = PyArray_ContainsNA(self, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot count the number of nonzeros in an array "
"which contains an NA");
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index b1dd6b8bb..8ab685c6f 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -717,7 +717,7 @@ array_boolean_subscript(PyArrayObject *self,
char *ret_data, *ret_maskna_data = NULL;
PyArray_Descr *dtype;
PyArrayObject *ret;
- int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0;
+ int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0, containsna;
npy_intp bmask_size;
if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
@@ -728,10 +728,12 @@ array_boolean_subscript(PyArrayObject *self,
/*
* See the Boolean Indexing section of the missing data NEP.
- *
- * TODO: Add 'wheremask' as a parameter to ContainsNA.
*/
- if (PyArray_ContainsNA(bmask)) {
+ containsna = PyArray_ContainsNA(bmask, NULL, NULL);
+ if (containsna == -1) {
+ return NULL;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"The boolean mask indexing array "
"may not contain any NA values");
@@ -957,7 +959,7 @@ array_ass_boolean_subscript(PyArrayObject *self,
char *v_data, *v_maskna_data = NULL;
int self_has_maskna = PyArray_HASMASKNA(self);
int v_has_maskna = PyArray_HASMASKNA(v);
- int needs_api = 0;
+ int needs_api = 0, containsna;
npy_intp bmask_size;
char constant_valid_mask = 1;
@@ -985,7 +987,11 @@ array_ass_boolean_subscript(PyArrayObject *self,
}
/* See the Boolean Indexing section of the missing data NEP */
- if (PyArray_ContainsNA(bmask)) {
+ containsna = PyArray_ContainsNA(bmask, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"The boolean mask assignment indexing array "
"may not contain any NA values");
@@ -994,7 +1000,11 @@ array_ass_boolean_subscript(PyArrayObject *self,
/* Can't assign an NA to an array which doesn't support it */
if (v_has_maskna && !self_has_maskna) {
- if (PyArray_ContainsNA(v)) {
+ containsna = PyArray_ContainsNA(v, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index a57d530ab..f6267beac 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -39,42 +39,136 @@ PyArray_HasNASupport(PyArrayObject *arr)
* Returns false if the array has no NA support. Returns
* true if the array has NA support AND there is an
* NA anywhere in the array.
+ *
+ * If 'wheremask' is non-NULL, only positions with True
+ * in 'wheremask' are checked for NA.
+ *
+ * The parameter 'whichna' is not yet supported, but is
+ * provided for future multi-NA support. It should be set
+ * to NULL.
+ *
+ * Returns -1 on failure, otherwise 0 for False and 1 for True.
*/
-NPY_NO_EXPORT npy_bool
-PyArray_ContainsNA(PyArrayObject *arr)
+NPY_NO_EXPORT int
+PyArray_ContainsNA(PyArrayObject *arr, PyArrayObject *wheremask,
+ npy_bool *whichna)
{
- /* Need NA support to contain NA */
- if (PyArray_HASMASKNA(arr)) {
- int idim, ndim;
- char *data;
- npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
- npy_intp i, coord[NPY_MAXDIMS];
-
- if (PyArray_HASFIELDS(arr)) {
- /* TODO: need to add field-NA support */
- return 1;
- }
+ /* Validate that the parameter for future expansion is NULL */
+ if (whichna != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "multi-NA is not yet supported in PyArray_ContainsNA");
+ return -1;
+ }
- /* Use raw iteration with no heap memory allocation */
- if (PyArray_PrepareOneRawArrayIter(
+ if (wheremask == NULL) {
+ /* Need NA support to contain NA */
+ if (PyArray_HASMASKNA(arr)) {
+ int idim, ndim;
+ char *data;
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+
+ if (PyArray_HASFIELDS(arr)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "field-NA is not yet supported");
+ return -1;
+ }
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareOneRawArrayIter(
PyArray_NDIM(arr), PyArray_DIMS(arr),
PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
&ndim, shape,
&data, strides) < 0) {
- PyErr_Clear();
- return 1;
- }
+ return -1;
+ }
- /* Do the iteration */
- NPY_RAW_ITER_START(idim, ndim, coord, shape) {
- char *d = data;
- /* Process the innermost dimension */
- for (i = 0; i < shape[0]; ++i, d += strides[0]) {
- if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
- return 1;
+ /* Do the iteration */
+ NPY_RAW_ITER_START(idim, ndim, coord, shape) {
+ char *d = data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i, d += strides[0]) {
+ if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
+ return 1;
+ }
}
+ } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
+ }
+ }
+ else {
+ npy_intp wheremask_strides_bcast[NPY_MAXDIMS];
+ int containsna;
+
+ containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
+ if (containsna != 0) {
+ if (containsna == -1) {
+ return -1;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "the where mask may not contain any NA values");
+ return -1;
}
- } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
+ }
+
+ /*
+ * Broadcast the where-mask onto arr. Note that this
+ * is before checking if 'arr' has an NA mask, to
+ * catch any broadcasting errors.
+ */
+ if (broadcast_strides(PyArray_NDIM(arr), PyArray_DIMS(arr),
+ PyArray_NDIM(wheremask), PyArray_DIMS(wheremask),
+ PyArray_STRIDES(wheremask), "where mask",
+ wheremask_strides_bcast) < 0) {
+ return -1;
+ }
+
+ if (PyArray_DTYPE(wheremask)->type_num != NPY_BOOL) {
+ PyErr_SetString(PyExc_ValueError,
+ "the where mask must have a 'bool' dtype");
+ return -1;
+ }
+
+ if (PyArray_HASMASKNA(arr)) {
+ int idim, ndim;
+ char *data, *wheremask_data;
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp wheremask_strides[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+
+ if (PyArray_HASFIELDS(arr)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "field-NA is not yet supported");
+ return -1;
+ }
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareTwoRawArrayIter(
+ PyArray_NDIM(arr), PyArray_DIMS(arr),
+ PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
+ PyArray_DATA(wheremask), wheremask_strides_bcast,
+ &ndim, shape,
+ &data, strides,
+ &wheremask_data, wheremask_strides) < 0) {
+ return -1;
+ }
+
+ /* Do the iteration */
+ NPY_RAW_ITER_START(idim, ndim, coord, shape) {
+ char *d = data, *where_d = wheremask_data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i) {
+ if (*where_d && !NpyMaskValue_IsExposed((npy_mask)(*d))) {
+ return 1;
+ }
+
+ d += strides[0];
+ where_d += wheremask_strides[0];
+ }
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape,
+ data, strides,
+ wheremask_data, wheremask_strides);
+ }
}
return 0;
diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c
index 3034a8e2d..aafe370ab 100644
--- a/numpy/core/src/multiarray/reduction.c
+++ b/numpy/core/src/multiarray/reduction.c
@@ -509,6 +509,9 @@ PyArray_InitializeReduceResult(
*
* operand : The array to be reduced.
* out : NULL, or the array into which to place the result.
+ * wheremask : NOT YET SUPPORTED, but this parameter is placed here
+ * so that support can be added in the future without breaking
+ * API compatibility. Pass in NULL.
* operand_dtype : The dtype the inner loop expects for the operand.
* result_dtype : The dtype the inner loop expects for the result.
* casting : The casting rule to apply to the operands.
@@ -519,6 +522,9 @@ PyArray_InitializeReduceResult(
* arbitrary order. The calculation may be reordered because
* of cache behavior or multithreading requirements.
* skipna : If true, NAs are skipped instead of propagating.
+ * whichskipna : NOT YET SUPPORTED, but this parameter is placed here
+ * so that support can be added for multi-NA without
+ * breaking API compatibility. Pass in NULL.
* keepdims : If true, leaves the reduction dimensions in the result
* with size one.
* assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise
@@ -532,11 +538,12 @@ PyArray_InitializeReduceResult(
*/
NPY_NO_EXPORT PyArrayObject *
PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
+ PyArrayObject *wheremask,
PyArray_Descr *operand_dtype,
PyArray_Descr *result_dtype,
NPY_CASTING casting,
npy_bool *axis_flags, int reorderable,
- int skipna, int keepdims,
+ int skipna, npy_bool *skipwhichna, int keepdims,
PyArray_AssignReduceIdentityFunc *assign_identity,
PyArray_ReduceLoopFunc *loop,
PyArray_ReduceLoopFunc *masked_loop,
@@ -552,6 +559,20 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
PyArray_Descr *op_dtypes[2];
npy_uint32 flags, op_flags[2];
+ /* Validate that the parameters for future expansion are NULL */
+ if (wheremask != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Reduce operations in NumPy do not yet support "
+ "a where mask");
+ return NULL;
+ }
+ if (skipwhichna != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "multi-NA support is not yet implemented in "
+ "reduce operations");
+ return NULL;
+ }
+
use_maskna = PyArray_HASMASKNA(operand);
/*
@@ -559,7 +580,11 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
* contains no NA values so we can ignore the mask entirely.
*/
if (use_maskna && !skipna && out != NULL && !PyArray_HASMASKNA(out)) {
- if (PyArray_ContainsNA(operand)) {
+ int containsna = PyArray_ContainsNA(operand, wheremask, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index a9a3bc8b8..c1ab4055d 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -987,7 +987,11 @@ static int get_ufunc_arguments(PyUFuncObject *ufunc,
/* Check all the inputs for NA */
for(i = 0; i < nin; ++i) {
if (PyArray_HASMASKNA(out_op[i])) {
- if (PyArray_ContainsNA(out_op[i])) {
+ int containsna = PyArray_ContainsNA(out_op[i], NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA value to an array which "
"does not support NAs");
@@ -2848,10 +2852,10 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
return NULL;
}
- result = PyArray_ReduceWrapper(arr, out, dtype, dtype,
+ result = PyArray_ReduceWrapper(arr, out, NULL, dtype, dtype,
NPY_UNSAFE_CASTING,
axis_flags, reorderable,
- skipna, keepdims,
+ skipna, NULL, keepdims,
assign_identity,
reduce_loop,
masked_reduce_loop,
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index 134a48e3a..69a6cfbdd 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -312,6 +312,35 @@ def test_array_maskna_isna_2D():
# TODO: fancy indexing is next...
+def test_array_maskna_to_nomask():
+ # Assignment from an array with NAs to a non-masked array,
+ # excluding the NAs with a mask
+ a = np.array([[2,np.NA,5],[1,6,np.NA]], maskna=True)
+ mask = np.array([[1,0,0],[1,1,0]], dtype='?')
+ badmask = np.array([[1,0,0],[0,1,1]], dtype='?')
+ expected = np.array([[2,1,2],[1,6,5]])
+
+ # With masked indexing
+ b = np.arange(6).reshape(2,3)
+ b[mask] = a[mask]
+ assert_array_equal(b, expected)
+
+ # With copyto
+ b = np.arange(6).reshape(2,3)
+ np.copyto(b, a, where=mask)
+ assert_array_equal(b, expected)
+
+ # With masked indexing
+ b = np.arange(6).reshape(2,3)
+ def asn():
+ b[badmask] = a[badmask]
+ assert_raises(ValueError, asn)
+
+ # With copyto
+ b = np.arange(6).reshape(2,3)
+ assert_raises(ValueError, np.copyto, b, a, where=badmask)
+
+
def test_array_maskna_view_function():
a = np.arange(10)