summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-22 22:27:01 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:59 -0600
commit9ca27aecb17baee83a58d61507250d9aaa5ca34c (patch)
treef3855e8a7cfb447b06e4bac037ab8a43c404a7b6 /numpy
parentc8c262a1be42c3989994bdc557c7b25e22d80d83 (diff)
downloadnumpy-9ca27aecb17baee83a58d61507250d9aaa5ca34c.tar.gz
ENH: missingdata: Add wheremask to PyArray_ContainsNA
Use this to make masked assignment just check the elements its copying for NA, so that the source array can have NAs, just not where the mask says.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/multiarray/array_assign_array.c13
-rw-r--r--numpy/core/src/multiarray/array_assign_scalar.c6
-rw-r--r--numpy/core/src/multiarray/common.c6
-rw-r--r--numpy/core/src/multiarray/ctors.c6
-rw-r--r--numpy/core/src/multiarray/item_selection.c26
-rw-r--r--numpy/core/src/multiarray/mapping.c24
-rw-r--r--numpy/core/src/multiarray/na_mask.c146
-rw-r--r--numpy/core/src/multiarray/reduction.c29
-rw-r--r--numpy/core/src/umath/ufunc_object.c10
-rw-r--r--numpy/core/tests/test_maskna.py29
10 files changed, 243 insertions, 52 deletions
diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c
index 6734d845a..eae1d10d1 100644
--- a/numpy/core/src/multiarray/array_assign_array.c
+++ b/numpy/core/src/multiarray/array_assign_array.c
@@ -480,8 +480,11 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
}
if (src_has_maskna && !dst_has_maskna) {
- /* TODO: add 'wheremask' as a parameter to ContainsNA */
- if (PyArray_ContainsNA(src)) {
+ int containsna = PyArray_ContainsNA(src, wheremask, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
@@ -655,8 +658,12 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
}
else {
npy_intp wheremask_strides[NPY_MAXDIMS];
+ int containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
- if (PyArray_ContainsNA(wheremask)) {
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
if (!dst_has_maskna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 9200c856e..a1e2192c1 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -431,8 +431,12 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
}
else {
npy_intp wheremask_strides[NPY_MAXDIMS];
+ int containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
- if (PyArray_ContainsNA(wheremask)) {
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
if (!dst_has_maskna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 45c7558de..d146b2a51 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -101,7 +101,11 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na,
/* Check if it's an ndarray */
if (PyArray_Check(obj)) {
/* Check for any NAs in the array */
- if (PyArray_ContainsNA((PyArrayObject *)obj)) {
+ int containsna = PyArray_ContainsNA((PyArrayObject *)obj, NULL, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
*out_contains_na = 1;
}
dtype = PyArray_DESCR((PyArrayObject *)obj);
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 2b1edd000..183f90e9b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -2620,7 +2620,11 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
baseflags |= NPY_ITER_USE_MASKNA;
}
else {
- if (PyArray_ContainsNA(src)) {
+ int containsna = PyArray_ContainsNA(src, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 32f190a4d..472f254ed 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -123,11 +123,17 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
if (PyArray_HASMASKNA(obj)) {
use_maskna = 1;
}
- else if (PyArray_ContainsNA(self)) {
- PyErr_SetString(PyExc_ValueError,
- "Cannot assign NA to an array which "
- "does not support NAs");
- goto fail;
+ else {
+ int containsna = PyArray_ContainsNA(self, NULL, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
+ PyErr_SetString(PyExc_ValueError,
+ "Cannot assign NA to an array which "
+ "does not support NAs");
+ goto fail;
+ }
}
}
}
@@ -2009,10 +2015,10 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
return NULL;
}
- result = PyArray_ReduceWrapper(arr, out,
+ result = PyArray_ReduceWrapper(arr, out, NULL,
PyArray_DESCR(arr), dtype,
NPY_SAME_KIND_CASTING,
- axis_flags, 1, skipna, keepdims,
+ axis_flags, 1, skipna, NULL, keepdims,
&assign_reduce_identity_zero,
&reduce_count_nonzero_loop,
&reduce_count_nonzero_masked_loop,
@@ -2047,7 +2053,11 @@ PyArray_CountNonzero(PyArrayObject *self)
/* If 'self' has an NA mask, make sure it has no NA values */
if (PyArray_HASMASKNA(self)) {
- if (PyArray_ContainsNA(self)) {
+ int containsna = PyArray_ContainsNA(self, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot count the number of nonzeros in an array "
"which contains an NA");
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index b1dd6b8bb..8ab685c6f 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -717,7 +717,7 @@ array_boolean_subscript(PyArrayObject *self,
char *ret_data, *ret_maskna_data = NULL;
PyArray_Descr *dtype;
PyArrayObject *ret;
- int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0;
+ int self_has_maskna = PyArray_HASMASKNA(self), needs_api = 0, containsna;
npy_intp bmask_size;
if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
@@ -728,10 +728,12 @@ array_boolean_subscript(PyArrayObject *self,
/*
* See the Boolean Indexing section of the missing data NEP.
- *
- * TODO: Add 'wheremask' as a parameter to ContainsNA.
*/
- if (PyArray_ContainsNA(bmask)) {
+ containsna = PyArray_ContainsNA(bmask, NULL, NULL);
+ if (containsna == -1) {
+ return NULL;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"The boolean mask indexing array "
"may not contain any NA values");
@@ -957,7 +959,7 @@ array_ass_boolean_subscript(PyArrayObject *self,
char *v_data, *v_maskna_data = NULL;
int self_has_maskna = PyArray_HASMASKNA(self);
int v_has_maskna = PyArray_HASMASKNA(v);
- int needs_api = 0;
+ int needs_api = 0, containsna;
npy_intp bmask_size;
char constant_valid_mask = 1;
@@ -985,7 +987,11 @@ array_ass_boolean_subscript(PyArrayObject *self,
}
/* See the Boolean Indexing section of the missing data NEP */
- if (PyArray_ContainsNA(bmask)) {
+ containsna = PyArray_ContainsNA(bmask, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"The boolean mask assignment indexing array "
"may not contain any NA values");
@@ -994,7 +1000,11 @@ array_ass_boolean_subscript(PyArrayObject *self,
/* Can't assign an NA to an array which doesn't support it */
if (v_has_maskna && !self_has_maskna) {
- if (PyArray_ContainsNA(v)) {
+ containsna = PyArray_ContainsNA(v, NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c
index a57d530ab..f6267beac 100644
--- a/numpy/core/src/multiarray/na_mask.c
+++ b/numpy/core/src/multiarray/na_mask.c
@@ -39,42 +39,136 @@ PyArray_HasNASupport(PyArrayObject *arr)
* Returns false if the array has no NA support. Returns
* true if the array has NA support AND there is an
* NA anywhere in the array.
+ *
+ * If 'wheremask' is non-NULL, only positions with True
+ * in 'wheremask' are checked for NA.
+ *
+ * The parameter 'whichna' is not yet supported, but is
+ * provided for future multi-NA support. It should be set
+ * to NULL.
+ *
+ * Returns -1 on failure, otherwise 0 for False and 1 for True.
*/
-NPY_NO_EXPORT npy_bool
-PyArray_ContainsNA(PyArrayObject *arr)
+NPY_NO_EXPORT int
+PyArray_ContainsNA(PyArrayObject *arr, PyArrayObject *wheremask,
+ npy_bool *whichna)
{
- /* Need NA support to contain NA */
- if (PyArray_HASMASKNA(arr)) {
- int idim, ndim;
- char *data;
- npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
- npy_intp i, coord[NPY_MAXDIMS];
-
- if (PyArray_HASFIELDS(arr)) {
- /* TODO: need to add field-NA support */
- return 1;
- }
+ /* Validate that the parameter for future expansion is NULL */
+ if (whichna != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "multi-NA is not yet supported in PyArray_ContainsNA");
+ return -1;
+ }
- /* Use raw iteration with no heap memory allocation */
- if (PyArray_PrepareOneRawArrayIter(
+ if (wheremask == NULL) {
+ /* Need NA support to contain NA */
+ if (PyArray_HASMASKNA(arr)) {
+ int idim, ndim;
+ char *data;
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+
+ if (PyArray_HASFIELDS(arr)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "field-NA is not yet supported");
+ return -1;
+ }
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareOneRawArrayIter(
PyArray_NDIM(arr), PyArray_DIMS(arr),
PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
&ndim, shape,
&data, strides) < 0) {
- PyErr_Clear();
- return 1;
- }
+ return -1;
+ }
- /* Do the iteration */
- NPY_RAW_ITER_START(idim, ndim, coord, shape) {
- char *d = data;
- /* Process the innermost dimension */
- for (i = 0; i < shape[0]; ++i, d += strides[0]) {
- if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
- return 1;
+ /* Do the iteration */
+ NPY_RAW_ITER_START(idim, ndim, coord, shape) {
+ char *d = data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i, d += strides[0]) {
+ if (!NpyMaskValue_IsExposed((npy_mask)(*d))) {
+ return 1;
+ }
}
+ } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
+ }
+ }
+ else {
+ npy_intp wheremask_strides_bcast[NPY_MAXDIMS];
+ int containsna;
+
+ containsna = PyArray_ContainsNA(wheremask, NULL, NULL);
+ if (containsna != 0) {
+ if (containsna == -1) {
+ return -1;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "the where mask may not contain any NA values");
+ return -1;
}
- } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides);
+ }
+
+ /*
+ * Broadcast the where-mask onto arr. Note that this
+ * is before checking if 'arr' has an NA mask, to
+ * catch any broadcasting errors.
+ */
+ if (broadcast_strides(PyArray_NDIM(arr), PyArray_DIMS(arr),
+ PyArray_NDIM(wheremask), PyArray_DIMS(wheremask),
+ PyArray_STRIDES(wheremask), "where mask",
+ wheremask_strides_bcast) < 0) {
+ return -1;
+ }
+
+ if (PyArray_DTYPE(wheremask)->type_num != NPY_BOOL) {
+ PyErr_SetString(PyExc_ValueError,
+ "the where mask must have a 'bool' dtype");
+ return -1;
+ }
+
+ if (PyArray_HASMASKNA(arr)) {
+ int idim, ndim;
+ char *data, *wheremask_data;
+ npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
+ npy_intp wheremask_strides[NPY_MAXDIMS];
+ npy_intp i, coord[NPY_MAXDIMS];
+
+ if (PyArray_HASFIELDS(arr)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "field-NA is not yet supported");
+ return -1;
+ }
+
+ /* Use raw iteration with no heap memory allocation */
+ if (PyArray_PrepareTwoRawArrayIter(
+ PyArray_NDIM(arr), PyArray_DIMS(arr),
+ PyArray_MASKNA_DATA(arr), PyArray_MASKNA_STRIDES(arr),
+ PyArray_DATA(wheremask), wheremask_strides_bcast,
+ &ndim, shape,
+ &data, strides,
+ &wheremask_data, wheremask_strides) < 0) {
+ return -1;
+ }
+
+ /* Do the iteration */
+ NPY_RAW_ITER_START(idim, ndim, coord, shape) {
+ char *d = data, *where_d = wheremask_data;
+ /* Process the innermost dimension */
+ for (i = 0; i < shape[0]; ++i) {
+ if (*where_d && !NpyMaskValue_IsExposed((npy_mask)(*d))) {
+ return 1;
+ }
+
+ d += strides[0];
+ where_d += wheremask_strides[0];
+ }
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape,
+ data, strides,
+ wheremask_data, wheremask_strides);
+ }
}
return 0;
diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c
index 3034a8e2d..aafe370ab 100644
--- a/numpy/core/src/multiarray/reduction.c
+++ b/numpy/core/src/multiarray/reduction.c
@@ -509,6 +509,9 @@ PyArray_InitializeReduceResult(
*
* operand : The array to be reduced.
* out : NULL, or the array into which to place the result.
+ * wheremask : NOT YET SUPPORTED, but this parameter is placed here
+ * so that support can be added in the future without breaking
+ * API compatibility. Pass in NULL.
* operand_dtype : The dtype the inner loop expects for the operand.
* result_dtype : The dtype the inner loop expects for the result.
* casting : The casting rule to apply to the operands.
@@ -519,6 +522,9 @@ PyArray_InitializeReduceResult(
* arbitrary order. The calculation may be reordered because
* of cache behavior or multithreading requirements.
* skipna : If true, NAs are skipped instead of propagating.
+ * whichskipna : NOT YET SUPPORTED, but this parameter is placed here
+ * so that support can be added for multi-NA without
+ * breaking API compatibility. Pass in NULL.
* keepdims : If true, leaves the reduction dimensions in the result
* with size one.
* assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise
@@ -532,11 +538,12 @@ PyArray_InitializeReduceResult(
*/
NPY_NO_EXPORT PyArrayObject *
PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
+ PyArrayObject *wheremask,
PyArray_Descr *operand_dtype,
PyArray_Descr *result_dtype,
NPY_CASTING casting,
npy_bool *axis_flags, int reorderable,
- int skipna, int keepdims,
+ int skipna, npy_bool *skipwhichna, int keepdims,
PyArray_AssignReduceIdentityFunc *assign_identity,
PyArray_ReduceLoopFunc *loop,
PyArray_ReduceLoopFunc *masked_loop,
@@ -552,6 +559,20 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
PyArray_Descr *op_dtypes[2];
npy_uint32 flags, op_flags[2];
+ /* Validate that the parameters for future expansion are NULL */
+ if (wheremask != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Reduce operations in NumPy do not yet support "
+ "a where mask");
+ return NULL;
+ }
+ if (skipwhichna != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "multi-NA support is not yet implemented in "
+ "reduce operations");
+ return NULL;
+ }
+
use_maskna = PyArray_HASMASKNA(operand);
/*
@@ -559,7 +580,11 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
* contains no NA values so we can ignore the mask entirely.
*/
if (use_maskna && !skipna && out != NULL && !PyArray_HASMASKNA(out)) {
- if (PyArray_ContainsNA(operand)) {
+ int containsna = PyArray_ContainsNA(operand, wheremask, NULL);
+ if (containsna == -1) {
+ goto fail;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA to an array which "
"does not support NAs");
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index a9a3bc8b8..c1ab4055d 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -987,7 +987,11 @@ static int get_ufunc_arguments(PyUFuncObject *ufunc,
/* Check all the inputs for NA */
for(i = 0; i < nin; ++i) {
if (PyArray_HASMASKNA(out_op[i])) {
- if (PyArray_ContainsNA(out_op[i])) {
+ int containsna = PyArray_ContainsNA(out_op[i], NULL, NULL);
+ if (containsna == -1) {
+ return -1;
+ }
+ else if (containsna) {
PyErr_SetString(PyExc_ValueError,
"Cannot assign NA value to an array which "
"does not support NAs");
@@ -2848,10 +2852,10 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
return NULL;
}
- result = PyArray_ReduceWrapper(arr, out, dtype, dtype,
+ result = PyArray_ReduceWrapper(arr, out, NULL, dtype, dtype,
NPY_UNSAFE_CASTING,
axis_flags, reorderable,
- skipna, keepdims,
+ skipna, NULL, keepdims,
assign_identity,
reduce_loop,
masked_reduce_loop,
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index 134a48e3a..69a6cfbdd 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -312,6 +312,35 @@ def test_array_maskna_isna_2D():
# TODO: fancy indexing is next...
+def test_array_maskna_to_nomask():
+ # Assignment from an array with NAs to a non-masked array,
+ # excluding the NAs with a mask
+ a = np.array([[2,np.NA,5],[1,6,np.NA]], maskna=True)
+ mask = np.array([[1,0,0],[1,1,0]], dtype='?')
+ badmask = np.array([[1,0,0],[0,1,1]], dtype='?')
+ expected = np.array([[2,1,2],[1,6,5]])
+
+ # With masked indexing
+ b = np.arange(6).reshape(2,3)
+ b[mask] = a[mask]
+ assert_array_equal(b, expected)
+
+ # With copyto
+ b = np.arange(6).reshape(2,3)
+ np.copyto(b, a, where=mask)
+ assert_array_equal(b, expected)
+
+ # With masked indexing
+ b = np.arange(6).reshape(2,3)
+ def asn():
+ b[badmask] = a[badmask]
+ assert_raises(ValueError, asn)
+
+ # With copyto
+ b = np.arange(6).reshape(2,3)
+ assert_raises(ValueError, np.copyto, b, a, where=badmask)
+
+
def test_array_maskna_view_function():
a = np.arange(10)