diff options
-rw-r--r-- | doc/neps/missing-data.rst | 36 | ||||
-rw-r--r-- | numpy/core/numeric.py | 9 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 5 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 24 |
5 files changed, 59 insertions, 19 deletions
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst index 49528da41..3cebf25ec 100644 --- a/doc/neps/missing-data.rst +++ b/doc/neps/missing-data.rst @@ -347,20 +347,42 @@ instead of masked and unmasked values. The functions are 'np.isna' and 'np.isavail', which test for NA or available values respectively. -Creating Masked Arrays -====================== +Creating NA-Masked Arrays +========================= + +The usual way to create an array with an NA mask is to pass the keyword +parameter maskna=True to one of the constructors. Most functions that +create a new array take this parameter, and produce an NA-masked +array with all its elements exposed when the parameter is set to True. -There are two flags which indicate and control the nature of the mask -used in masked arrays. +There are also two flags which indicate and control the nature of the mask +used in masked arrays. These flags can be used to add a mask, or ensure +the mask isn't a view into another array's mask. -First is 'arr.flags.hasmaskna', which is True for all masked arrays and +First is 'arr.flags.maskna', which is True for all masked arrays and may be set to True to add a mask to an array which does not have one. Second is 'arr.flags.ownmaskna', which is True if the array owns the memory to the mask, and False if the array has no mask, or has a view -into the mask of another array. If this is set to False in a masked +into the mask of another array. If this is set to True in a masked array, the array will create a copy of the mask so that further modifications -to the mask will not affect the array being viewed. +to the mask will not affect the original mask from which the view was taken. + +NA-Masks When Constructing From Lists +===================================== + +The initial design of NA-mask construction was to make all construction +fully explicit. This turns out to be unwieldy when working interactively +with NA-masked arrays, and having an object array be created instead of +an NA-masked array can be very surprising. + +Because of this, the design has been changed to enable an NA-mask whenever +creating an array from lists which have an NA object in them. There could +be some debate of whether one should create NA-masks or NA-bitpatterns +by default, but due to the time constraints it was only feasible to tackle +NA-masks, and extending the NA-mask support more fully throughout NumPy seems +much more reasonable than starting another system and ending up with two +incomplete systems. Mask Implementation Details =========================== diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index f1e6b2da9..9b7c20d76 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -1336,9 +1336,12 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None): skipdtype = (arr.dtype.type in _typelessdata) and arr.size > 0 if arr.flags.maskna: - lst += ", maskna=True" - # If everything is NA, can't skip the type - if skipdtype and all(isna(arr)): + whichna = isna(arr) + # If nothing is NA, explicitly signal the NA-mask + if not any(whichna): + lst += ", maskna=True" + # If everything is NA, can't skip the dtype + if skipdtype and all(whichna): skipdtype = False if skipdtype: diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 98d86f38a..45c7558de 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -100,6 +100,10 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, int *out_contains_na, /* Check if it's an ndarray */ if (PyArray_Check(obj)) { + /* Check for any NAs in the array */ + if (PyArray_ContainsNA((PyArrayObject *)obj)) { + *out_contains_na = 1; + } dtype = PyArray_DESCR((PyArrayObject *)obj); Py_INCREF(dtype); goto promote_types; diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index bcd33afd8..894e3fd22 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1723,11 +1723,10 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, /* If we got dimensions and dtype instead of an array */ if (arr == NULL) { /* - * If the input data is an NA object, and the ALLOWNA flag is + * If the input data contains any NAs, and the ALLOWNA flag is * enabled, produce an array with an NA mask. */ - if (contains_na && (flags & NPY_ARRAY_ALLOWNA) != 0 && - NpyNA_Check(op)) { + if (contains_na && (flags & NPY_ARRAY_ALLOWNA) != 0) { flags |= NPY_ARRAY_MASKNA; } diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index 4dd2b13e5..ff1db7603 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -29,11 +29,15 @@ def test_array_maskna_construction(): assert_equal(a.dtype, np.dtype('f8')) assert_(a.flags.maskna) assert_equal(type(a[2]), np.NAType) - # Without the 'maskna=True', produces an object array + # Without the 'maskna=True', still produces an NA mask if NA is there a = np.array([1.0, 2.0, np.NA, 7.0]) - assert_equal(a.dtype, np.dtype('O')) - assert_(not a.flags.maskna) + assert_equal(a.dtype, np.dtype('f8')) + assert_(a.flags.maskna) assert_equal(type(a[2]), np.NAType) + # Without any NAs, does not produce an NA mask + a = np.array([1.0, 2.0, 4.0, 7.0]) + assert_equal(a.dtype, np.dtype('f8')) + assert_(not a.flags.maskna) # From np.NA as a straight scalar a = np.array(np.NA, maskna=True) @@ -76,11 +80,19 @@ def test_array_maskna_construction(): def test_array_maskna_repr(): # Test some simple reprs with NA in them a = np.array(np.NA, maskna=True) - assert_equal(repr(a), 'array(NA, maskna=True, dtype=float64)') + assert_equal(repr(a), 'array(NA, dtype=float64)') + a = np.array(3, maskna=True) + assert_equal(repr(a), 'array(3, maskna=True)') a = np.array([np.NA, 3], maskna=True) - assert_equal(repr(a), 'array([NA, 3], maskna=True)') + assert_equal(repr(a), 'array([NA, 3])') + a = np.array([np.NA, np.NA]) + assert_equal(repr(a), 'array([NA, NA], dtype=float64)') a = np.array([3.5, np.NA], maskna=True) - assert_equal(repr(a), 'array([ 3.5, NA], maskna=True)') + assert_equal(repr(a), 'array([ 3.5, NA])') + a = np.array([3.75, 6.25], maskna=True) + assert_equal(repr(a), 'array([ 3.75, 6.25], maskna=True)') + a = np.array([3.75, 6.25], maskna=True, dtype='f4') + assert_equal(repr(a), 'array([ 3.75, 6.25], maskna=True, dtype=float32)') def test_isna(): # Objects which are not np.NA or ndarray all return False |