diff options
-rw-r--r-- | doc/release/2.0.0-notes.rst | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_assign_array.c | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_assign_scalar.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 26 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 24 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_mask.c | 146 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.c | 29 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 10 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 29 |
11 files changed, 246 insertions, 54 deletions
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst index e8e3ae4c5..d1ebd1b75 100644 --- a/doc/release/2.0.0-notes.rst +++ b/doc/release/2.0.0-notes.rst @@ -14,8 +14,9 @@ New features Mask-based NA missing values ---------------------------- -Support for NA missing values similar to those in R has been implemented. -This was done by adding optional NA masks to the core array object. +Preliminary support for NA missing values similar to those in R has +been implemented. This was done by adding optional NA masks to the core +array object. While a significant amount of the NumPy functionality has been extended to support NA masks, not everything is yet supported. Here is an (incomplete) diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c index 6734d845a..eae1d10d1 100644 --- a/numpy/core/src/multiarray/array_assign_array.c +++ b/numpy/core/src/multiarray/array_assign_array.c @@ -480,8 +480,11 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src, } if (src_has_maskna && !dst_has_maskna) { - /* TODO: add 'wheremask' as a parameter to ContainsNA */ - if (PyArray_ContainsNA(src)) { + int containsna = PyArray_ContainsNA(src, wheremask, NULL); + if (containsna == -1) { + goto fail; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA to an array which " "does not support NAs"); @@ -655,8 +658,12 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src, } else { npy_intp wheremask_strides[NPY_MAXDIMS]; + int containsna = PyArray_ContainsNA(wheremask, NULL, NULL); - if (PyArray_ContainsNA(wheremask)) { + if (containsna == -1) { + goto fail; + } + else if (containsna) { if (!dst_has_maskna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA to an array which " diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c index 9200c856e..a1e2192c1 100644 --- a/numpy/core/src/multiarray/array_assign_scalar.c +++ b/numpy/core/src/multiarray/array_assign_scalar.c @@ -431,8 +431,12 @@ PyArray_AssignRawScalar(PyArrayObject *dst, } else { npy_intp wheremask_strides[NPY_MAXDIMS]; + int containsna = PyArray_ContainsNA(wheremask, NULL, NULL); - if (PyArray_ContainsNA(wheremask)) { + if (containsna == -1) { + goto fail; + } + else if (containsna) { if (!dst_has_maskna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA to an array which " diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 45c7558de..d146b2a51 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -101,7 +101,11 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na, /* Check if it's an ndarray */ if (PyArray_Check(obj)) { /* Check for any NAs in the array */ - if (PyArray_ContainsNA((PyArrayObject *)obj)) { + int containsna = PyArray_ContainsNA((PyArrayObject *)obj, NULL, NULL); + if (containsna == -1) { + goto fail; + } + else if (containsna) { *out_contains_na = 1; } dtype = PyArray_DESCR((PyArrayObject *)obj); diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 2b1edd000..183f90e9b 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -2620,7 +2620,11 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) baseflags |= NPY_ITER_USE_MASKNA; } else { - if (PyArray_ContainsNA(src)) { + int containsna = PyArray_ContainsNA(src, NULL, NULL); + if (containsna == -1) { + return -1; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA to an array which " "does not support NAs"); diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 32f190a4d..472f254ed 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -123,11 +123,17 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, if (PyArray_HASMASKNA(obj)) { use_maskna = 1; } - else if (PyArray_ContainsNA(self)) { - PyErr_SetString(PyExc_ValueError, - "Cannot assign NA to an array which " - "does not support NAs"); - goto fail; + else { + int containsna = PyArray_ContainsNA(self, NULL, NULL); + if (containsna == -1) { + goto fail; + } + else if (containsna) { + PyErr_SetString(PyExc_ValueError, + "Cannot assign NA to an array which " + "does not support NAs"); + goto fail; + } } } } @@ -2009,10 +2015,10 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out, return NULL; } - result = PyArray_ReduceWrapper(arr, out, + result = PyArray_ReduceWrapper(arr, out, NULL, PyArray_DESCR(arr), dtype, NPY_SAME_KIND_CASTING, - axis_flags, 1, skipna, keepdims, + axis_flags, 1, skipna, NULL, keepdims, &assign_reduce_identity_zero, &reduce_count_nonzero_loop, &reduce_count_nonzero_masked_loop, @@ -2047,7 +2053,11 @@ PyArray_CountNonzero(PyArrayObject *self) /* If 'self' has an NA mask, make sure it has no NA values */ if (PyArray_HASMASKNA(self)) { - if (PyArray_ContainsNA(self)) { + int containsna = PyArray_ContainsNA(self, NULL, NULL); + if (containsna == -1) { + return -1; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "Cannot count the number of nonzeros in an array " "which contains an NA"); diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index b1dd6b8bb..8ab685c6f 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -717,7 +717,7 @@ array_boolean_subscript(PyArrayObject *self, char *ret_data, *ret_maskna_data = NULL; PyArray_Descr *dtype; PyArrayObject *ret; - int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0; + int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0, containsna; npy_intp bmask_size; if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) { @@ -728,10 +728,12 @@ array_boolean_subscript(PyArrayObject *self, /* * See the Boolean Indexing section of the missing data NEP. - * - * TODO: Add 'wheremask' as a parameter to ContainsNA. */ - if (PyArray_ContainsNA(bmask)) { + containsna = PyArray_ContainsNA(bmask, NULL, NULL); + if (containsna == -1) { + return NULL; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "The boolean mask indexing array " "may not contain any NA values"); @@ -957,7 +959,7 @@ array_ass_boolean_subscript(PyArrayObject *self, char *v_data, *v_maskna_data = NULL; int self_has_maskna = PyArray_HASMASKNA(self); int v_has_maskna = PyArray_HASMASKNA(v); - int needs_api = 0; + int needs_api = 0, containsna; npy_intp bmask_size; char constant_valid_mask = 1; @@ -985,7 +987,11 @@ array_ass_boolean_subscript(PyArrayObject *self, } /* See the Boolean Indexing section of the missing data NEP */ - if (PyArray_ContainsNA(bmask)) { + containsna = PyArray_ContainsNA(bmask, NULL, NULL); + if (containsna == -1) { + return -1; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "The boolean mask assignment indexing array " "may not contain any NA values"); @@ -994,7 +1000,11 @@ array_ass_boolean_subscript(PyArrayObject *self, /* Can't assign an NA to an array which doesn't support it */ if (v_has_maskna && !self_has_maskna) { - if (PyArray_ContainsNA(v)) { + containsna = PyArray_ContainsNA(v, NULL, NULL); + if (containsna == -1) { + return -1; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA to an array which " "does not support NAs"); diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c index a57d530ab..f6267beac 100644 --- a/numpy/core/src/multiarray/na_mask.c +++ b/numpy/core/src/multiarray/na_mask.c @@ -39,42 +39,136 @@ PyArray_HasNASupport(PyArrayObject *arr) * Returns false if the array has no NA support. Returns * true if the array has NA support AND there is an * NA anywhere in the array. + * + * If 'wheremask' is non-NULL, only positions with True + * in 'wheremask' are checked for NA. + * + * The parameter 'whichna' is not yet supported, but is + * provided for future multi-NA support. It should be set + * to NULL. + * + * Returns -1 on failure, otherwise 0 for False and 1 for True. */ -NPY_NO_EXPORT npy_bool -PyArray_ContainsNA(PyArrayObject *arr) +NPY_NO_EXPORT int +PyArray_ContainsNA(PyArrayObject *arr, PyArrayObject *wheremask, + npy_bool *whichna) { - /* Need NA support to contain NA */ - if (PyArray_HASMASKNA(arr)) { - int idim, ndim; - char *data; - npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; - npy_intp i, coord[NPY_MAXDIMS]; - - if (PyArray_HASFIELDS(arr)) { - /* TODO: need to add field-NA support */ - return 1; - } + /* Validate that the parameter for future expansion is NULL */ + if (whichna != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "multi-NA is not yet supported in PyArray_ContainsNA"); + return -1; + } - /* Use raw iteration with no heap memory allocation */ - if (PyArray_PrepareOneRawArrayIter( + if (wheremask == NULL) { + /* Need NA support to contain NA */ + if (PyArray_HASMASKNA(arr)) { + int idim, ndim; + char *data; + npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; + npy_intp i, coord[NPY_MAXDIMS]; + + if (PyArray_HASFIELDS(arr)) { + PyErr_SetString(PyExc_RuntimeError, + "field-NA is not yet supported"); + return -1; + } + + /* Use raw iteration with no heap memory allocation */ + if (PyArray_PrepareOneRawArrayIter( PyArray_NDIM(arr), PyArray_DIMS(arr), PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr), &ndim, shape, &data, strides) < 0) { - PyErr_Clear(); - return 1; - } + return -1; + } - /* Do the iteration */ - NPY_RAW_ITER_START(idim, ndim, coord, shape) { - char *d = data; - /* Process the innermost dimension */ - for (i = 0; i < shape[0]; ++i, d += strides[0]) { - if (!NpyMaskValue_IsExposed((npy_mask)(*d))) { - return 1; + /* Do the iteration */ + NPY_RAW_ITER_START(idim, ndim, coord, shape) { + char *d = data; + /* Process the innermost dimension */ + for (i = 0; i < shape[0]; ++i, d += strides[0]) { + if (!NpyMaskValue_IsExposed((npy_mask)(*d))) { + return 1; + } } + } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); + } + } + else { + npy_intp wheremask_strides_bcast[NPY_MAXDIMS]; + int containsna; + + containsna = PyArray_ContainsNA(wheremask, NULL, NULL); + if (containsna != 0) { + if (containsna == -1) { + return -1; + } + else { + PyErr_SetString(PyExc_ValueError, + "the where mask may not contain any NA values"); + return -1; } - } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides); + } + + /* + * Broadcast the where-mask onto arr. Note that this + * is before checking if 'arr' has an NA mask, to + * catch any broadcasting errors. + */ + if (broadcast_strides(PyArray_NDIM(arr), PyArray_DIMS(arr), + PyArray_NDIM(wheremask), PyArray_DIMS(wheremask), + PyArray_STRIDES(wheremask), "where mask", + wheremask_strides_bcast) < 0) { + return -1; + } + + if (PyArray_DTYPE(wheremask)->type_num != NPY_BOOL) { + PyErr_SetString(PyExc_ValueError, + "the where mask must have a 'bool' dtype"); + return -1; + } + + if (PyArray_HASMASKNA(arr)) { + int idim, ndim; + char *data, *wheremask_data; + npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; + npy_intp wheremask_strides[NPY_MAXDIMS]; + npy_intp i, coord[NPY_MAXDIMS]; + + if (PyArray_HASFIELDS(arr)) { + PyErr_SetString(PyExc_RuntimeError, + "field-NA is not yet supported"); + return -1; + } + + /* Use raw iteration with no heap memory allocation */ + if (PyArray_PrepareTwoRawArrayIter( + PyArray_NDIM(arr), PyArray_DIMS(arr), + PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr), + PyArray_DATA(wheremask), wheremask_strides_bcast, + &ndim, shape, + &data, strides, + &wheremask_data, wheremask_strides) < 0) { + return -1; + } + + /* Do the iteration */ + NPY_RAW_ITER_START(idim, ndim, coord, shape) { + char *d = data, *where_d = wheremask_data; + /* Process the innermost dimension */ + for (i = 0; i < shape[0]; ++i) { + if (*where_d && !NpyMaskValue_IsExposed((npy_mask)(*d))) { + return 1; + } + + d += strides[0]; + where_d += wheremask_strides[0]; + } + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, + data, strides, + wheremask_data, wheremask_strides); + } } return 0; diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c index 3034a8e2d..aafe370ab 100644 --- a/numpy/core/src/multiarray/reduction.c +++ b/numpy/core/src/multiarray/reduction.c @@ -509,6 +509,9 @@ PyArray_InitializeReduceResult( * * operand : The array to be reduced. * out : NULL, or the array into which to place the result. + * wheremask : NOT YET SUPPORTED, but this parameter is placed here + * so that support can be added in the future without breaking + * API compatibility. Pass in NULL. * operand_dtype : The dtype the inner loop expects for the operand. * result_dtype : The dtype the inner loop expects for the result. * casting : The casting rule to apply to the operands. @@ -519,6 +522,9 @@ PyArray_InitializeReduceResult( * arbitrary order. The calculation may be reordered because * of cache behavior or multithreading requirements. * skipna : If true, NAs are skipped instead of propagating. + * whichskipna : NOT YET SUPPORTED, but this parameter is placed here + * so that support can be added for multi-NA without + * breaking API compatibility. Pass in NULL. * keepdims : If true, leaves the reduction dimensions in the result * with size one. * assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise @@ -532,11 +538,12 @@ PyArray_InitializeReduceResult( */ NPY_NO_EXPORT PyArrayObject * PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, + PyArrayObject *wheremask, PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype, NPY_CASTING casting, npy_bool *axis_flags, int reorderable, - int skipna, int keepdims, + int skipna, npy_bool *skipwhichna, int keepdims, PyArray_AssignReduceIdentityFunc *assign_identity, PyArray_ReduceLoopFunc *loop, PyArray_ReduceLoopFunc *masked_loop, @@ -552,6 +559,20 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, PyArray_Descr *op_dtypes[2]; npy_uint32 flags, op_flags[2]; + /* Validate that the parameters for future expansion are NULL */ + if (wheremask != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "Reduce operations in NumPy do not yet support " + "a where mask"); + return NULL; + } + if (skipwhichna != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "multi-NA support is not yet implemented in " + "reduce operations"); + return NULL; + } + use_maskna = PyArray_HASMASKNA(operand); /* @@ -559,7 +580,11 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, * contains no NA values so we can ignore the mask entirely. */ if (use_maskna && !skipna && out != NULL && !PyArray_HASMASKNA(out)) { - if (PyArray_ContainsNA(operand)) { + int containsna = PyArray_ContainsNA(operand, wheremask, NULL); + if (containsna == -1) { + goto fail; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA to an array which " "does not support NAs"); diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index a9a3bc8b8..c1ab4055d 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -987,7 +987,11 @@ static int get_ufunc_arguments(PyUFuncObject *ufunc, /* Check all the inputs for NA */ for(i = 0; i < nin; ++i) { if (PyArray_HASMASKNA(out_op[i])) { - if (PyArray_ContainsNA(out_op[i])) { + int containsna = PyArray_ContainsNA(out_op[i], NULL, NULL); + if (containsna == -1) { + return -1; + } + else if (containsna) { PyErr_SetString(PyExc_ValueError, "Cannot assign NA value to an array which " "does not support NAs"); @@ -2848,10 +2852,10 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, return NULL; } - result = PyArray_ReduceWrapper(arr, out, dtype, dtype, + result = PyArray_ReduceWrapper(arr, out, NULL, dtype, dtype, NPY_UNSAFE_CASTING, axis_flags, reorderable, - skipna, keepdims, + skipna, NULL, keepdims, assign_identity, reduce_loop, masked_reduce_loop, diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index 134a48e3a..69a6cfbdd 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -312,6 +312,35 @@ def test_array_maskna_isna_2D(): # TODO: fancy indexing is next... +def test_array_maskna_to_nomask(): + # Assignment from an array with NAs to a non-masked array, + # excluding the NAs with a mask + a = np.array([[2,np.NA,5],[1,6,np.NA]], maskna=True) + mask = np.array([[1,0,0],[1,1,0]], dtype='?') + badmask = np.array([[1,0,0],[0,1,1]], dtype='?') + expected = np.array([[2,1,2],[1,6,5]]) + + # With masked indexing + b = np.arange(6).reshape(2,3) + b[mask] = a[mask] + assert_array_equal(b, expected) + + # With copyto + b = np.arange(6).reshape(2,3) + np.copyto(b, a, where=mask) + assert_array_equal(b, expected) + + # With masked indexing + b = np.arange(6).reshape(2,3) + def asn(): + b[badmask] = a[badmask] + assert_raises(ValueError, asn) + + # With copyto + b = np.arange(6).reshape(2,3) + assert_raises(ValueError, np.copyto, b, a, where=badmask) + + def test_array_maskna_view_function(): a = np.arange(10) |