diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-08-23 21:31:09 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:27:00 -0600 |
commit | 6c0ad59d384606ccf2a0afa20fb9d8a15ddd7255 (patch) | |
tree | 6c07da88ce6791efb25cee5396e567065e69b20b | |
parent | f9c1d415535a9ffde2676b7cec82d37548ee0afd (diff) | |
download | numpy-6c0ad59d384606ccf2a0afa20fb9d8a15ddd7255.tar.gz |
ENH: missingdata: Add maskna= and ownmaskna= parameters to np.asarray and friends
Also fix some array() NA mask construction issues and make sure the
base object doesn't collapse past the owner of the NA mask being
viewed in addition to the data.
-rw-r--r-- | numpy/add_newdocs.py | 15 | ||||
-rw-r--r-- | numpy/core/numeric.py | 48 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arrayobject.c | 34 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert.c | 14 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 63 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 46 |
6 files changed, 182 insertions, 38 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 7bd24b45e..d553e3ea2 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -631,7 +631,7 @@ add_newdoc('numpy.core', 'broadcast', ('reset', add_newdoc('numpy.core.multiarray', 'array', """ - array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0) + array(object, dtype=None, copy=True, order=None, subok=False, ndmin=0, maskna=None, ownmaskna=False) Create an array. @@ -667,6 +667,19 @@ add_newdoc('numpy.core.multiarray', 'array', Specifies the minimum number of dimensions that the resulting array should have. Ones will be pre-pended to the shape as needed to meet this requirement. + maskna : bool or None, optional + If this is set to True, it forces the array to have an NA mask. + If the input is an array without a mask, this means a view with + an NA mask is created. If the input is an array with a mask, the + mask is preserved as-is. + + If this is set to False, it forces the array to not have an NA + mask. If the input is an array with a mask, and has no NA values, + it will create a copy of the input without an NA mask. + ownmaskna : bool, optional + If this is set to True, forces the array to have a mask which + it owns. It may still return a view of the data from the input, + but the result will always own its own mask. Returns ------- diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index e01f24f0d..a66a9764d 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -254,7 +254,7 @@ putmask = multiarray.putmask einsum = multiarray.einsum isna = multiarray.isna -def asarray(a, dtype=None, order=None): +def asarray(a, dtype=None, order=None, maskna=None, ownmaskna=False): """ Convert the input to an array. @@ -269,6 +269,13 @@ def asarray(a, dtype=None, order=None): order : {'C', 'F'}, optional Whether to use row-major ('C') or column-major ('F' for FORTRAN) memory representation. Defaults to 'C'. + maskna : bool or None, optional + If this is set to True, it forces the array to have an NA mask. + If this is set to False, it forces the array to not have an NA + mask. + ownmaskna : bool, optional + If this is set to True, forces the array to have a mask which + it owns. Returns ------- @@ -322,9 +329,10 @@ def asarray(a, dtype=None, order=None): True """ - return array(a, dtype, copy=False, order=order) + return array(a, dtype, copy=False, order=order, + maskna=maskna, ownmaskna=ownmaskna) -def asanyarray(a, dtype=None, order=None): +def asanyarray(a, dtype=None, order=None, maskna=None, ownmaskna=False): """ Convert the input to an ndarray, but pass ndarray subclasses through. @@ -339,6 +347,13 @@ def asanyarray(a, dtype=None, order=None): order : {'C', 'F'}, optional Whether to use row-major ('C') or column-major ('F') memory representation. Defaults to 'C'. + maskna : bool or None, optional + If this is set to True, it forces the array to have an NA mask. + If this is set to False, it forces the array to not have an NA + mask. + ownmaskna : bool, optional + If this is set to True, forces the array to have a mask which + it owns. Returns ------- @@ -374,9 +389,10 @@ def asanyarray(a, dtype=None, order=None): True """ - return array(a, dtype, copy=False, order=order, subok=True) + return array(a, dtype, copy=False, order=order, subok=True, + maskna=maskna, ownmaskna=ownmaskna) -def ascontiguousarray(a, dtype=None): +def ascontiguousarray(a, dtype=None, maskna=None, ownmaskna=False): """ Return a contiguous array in memory (C order). @@ -386,6 +402,13 @@ def ascontiguousarray(a, dtype=None): Input array. dtype : str or dtype object, optional Data-type of returned array. + maskna : bool or None, optional + If this is set to True, it forces the array to have an NA mask. + If this is set to False, it forces the array to not have an NA + mask. + ownmaskna : bool, optional + If this is set to True, forces the array to have a mask which + it owns. Returns ------- @@ -410,9 +433,10 @@ def ascontiguousarray(a, dtype=None): True """ - return array(a, dtype, copy=False, order='C', ndmin=1) + return array(a, dtype, copy=False, order='C', ndmin=1, + maskna=maskna, ownmaskna=ownmaskna) -def asfortranarray(a, dtype=None): +def asfortranarray(a, dtype=None, maskna=None, ownmaskna=False): """ Return an array laid out in Fortran order in memory. @@ -422,6 +446,13 @@ def asfortranarray(a, dtype=None): Input array. dtype : str or dtype object, optional By default, the data-type is inferred from the input data. + maskna : bool or None, optional + If this is set to True, it forces the array to have an NA mask. + If this is set to False, it forces the array to not have an NA + mask. + ownmaskna : bool, optional + If this is set to True, forces the array to have a mask which + it owns. Returns ------- @@ -446,7 +477,8 @@ def asfortranarray(a, dtype=None): True """ - return array(a, dtype, copy=False, order='F', ndmin=1) + return array(a, dtype, copy=False, order='F', ndmin=1, + maskna=maskna, ownmaskna=ownmaskna) def require(a, dtype=None, requirements=None): """ diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index f24ea376e..0063f8daa 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -93,18 +93,41 @@ PyArray_SetBaseObject(PyArrayObject *arr, PyObject *obj) "dependency more than once"); return -1; } + /* * Don't allow chains of views, always set the base - * to the owner of the data + * to the owner of the data. That is, either the first object + * which isn't an array, the first object with an NA mask + * which owns that NA mask, or the first object which owns + * its own data. */ - while (PyArray_Check(obj) && - (PyObject *)arr != obj && - PyArray_BASE((PyArrayObject *)obj) != NULL) { - PyObject *tmp = PyArray_BASE((PyArrayObject *)obj); + while (PyArray_Check(obj) && (PyObject *)arr != obj) { + PyArrayObject *obj_arr = (PyArrayObject *)arr; + PyObject *tmp; + + /* If this array owns its own data, stop collapsing */ + if (PyArray_CHKFLAGS(obj_arr, NPY_ARRAY_OWNDATA)) { + break; + } + /* + * If 'arr' doesn't own its NA mask, then if + * 'obj' is NA masked and owns the mask, stop collapsing + */ + if (!PyArray_CHKFLAGS(arr, NPY_ARRAY_OWNMASKNA) && + PyArray_CHKFLAGS(obj_arr, NPY_ARRAY_OWNMASKNA)) { + break; + } + /* If there's no base, stop collapsing */ + tmp = PyArray_BASE(obj_arr); + if (tmp == NULL) { + break; + } + Py_INCREF(tmp); Py_DECREF(obj); obj = tmp; } + /* Disallow circular references */ if ((PyObject *)arr == obj) { Py_DECREF(obj); @@ -112,6 +135,7 @@ PyArray_SetBaseObject(PyArrayObject *arr, PyObject *obj) "Cannot create a circular NumPy array 'base' dependency"); return -1; } + ((PyArrayObject_fieldaccess *)arr)->base = obj; return 0; diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index bbadd3847..f79ffc6a1 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -571,12 +571,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) if (ret == NULL) { return NULL; } - Py_INCREF(self); - if (PyArray_SetBaseObject(ret, (PyObject *)self) < 0) { - Py_DECREF(ret); - Py_DECREF(type); - return NULL; - } /* Take a view of the mask if it exists */ if (PyArray_HASMASKNA(self)) { @@ -600,6 +594,14 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) fa->flags |= NPY_ARRAY_MASKNA; } + /* Set the base object */ + Py_INCREF(self); + if (PyArray_SetBaseObject(ret, (PyObject *)self) < 0) { + Py_DECREF(ret); + Py_DECREF(type); + return NULL; + } + if (type != NULL) { if (PyObject_SetAttrString((PyObject *)ret, "dtype", (PyObject *)type) < 0) { diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index ea1c94008..ae8fa9eae 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1999,16 +1999,26 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) if (PyArray_NDIM(arr) <= 1 && (flags & NPY_ARRAY_F_CONTIGUOUS)) { flags |= NPY_ARRAY_C_CONTIGUOUS; } + /* If a guaranteed copy was requested */ copy = (flags & NPY_ARRAY_ENSURECOPY) || - ((flags & NPY_ARRAY_C_CONTIGUOUS) && - (!(arrflags & NPY_ARRAY_C_CONTIGUOUS))) - || ((flags & NPY_ARRAY_ALIGNED) && - (!(arrflags & NPY_ARRAY_ALIGNED))) - || (PyArray_NDIM(arr) > 1 && - ((flags & NPY_ARRAY_F_CONTIGUOUS) && - (!(arrflags & NPY_ARRAY_F_CONTIGUOUS)))) - || ((flags & NPY_ARRAY_WRITEABLE) && - (!(arrflags & NPY_ARRAY_WRITEABLE))) || + /* If C contiguous was requested, and arr is not */ + ((flags & NPY_ARRAY_C_CONTIGUOUS) && + (!(arrflags & NPY_ARRAY_C_CONTIGUOUS))) || + /* If an aligned array was requested, and arr is not */ + ((flags & NPY_ARRAY_ALIGNED) && + (!(arrflags & NPY_ARRAY_ALIGNED))) || + /* If a Fortran contiguous array was requested, and arr is not */ + (PyArray_NDIM(arr) > 1 && + ((flags & NPY_ARRAY_F_CONTIGUOUS) && + (!(arrflags & NPY_ARRAY_F_CONTIGUOUS)))) || + /* If a writeable array was requested, and arr is not */ + ((flags & NPY_ARRAY_WRITEABLE) && + (!(arrflags & NPY_ARRAY_WRITEABLE))) || + /* If an array with no NA mask was requested, and arr has one */ + ((flags & (NPY_ARRAY_ALLOWNA | + NPY_ARRAY_MASKNA | + NPY_ARRAY_OWNMASKNA)) == 0 && + (arrflags & NPY_ARRAY_MASKNA)) || !PyArray_EquivTypes(oldtype, newtype); if (copy) { @@ -2043,7 +2053,7 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) * Allocate an NA mask if necessary from the input, * is NAs are being allowed. */ - if (PyArray_HASMASKNA(arr) && (flags & NPY_ARRAY_ALLOWNA)) { + if ((arrflags & NPY_ARRAY_MASKNA) && (flags & NPY_ARRAY_ALLOWNA)) { if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) { Py_DECREF(ret); return NULL; @@ -2080,25 +2090,44 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) } } /* - * If no copy then just increase the reference - * count and return the input + * If no copy then take an appropriate view if necessary, or + * just return a reference to ret itself. */ else { + int needview = ((flags & NPY_ARRAY_ENSUREARRAY) && + !PyArray_CheckExact(arr)) || + ((flags & NPY_ARRAY_MASKNA) && + !(arrflags & NPY_ARRAY_MASKNA)) || + ((flags & NPY_ARRAY_OWNMASKNA) && + !(arrflags & NPY_ARRAY_OWNMASKNA)); + Py_DECREF(newtype); - if ((flags & NPY_ARRAY_ENSUREARRAY) && - !PyArray_CheckExact(arr)) { + if (needview) { PyArray_Descr *dtype = PyArray_DESCR(arr); - Py_INCREF(dtype); + PyTypeObject *subtype = NULL; + + if (flags & NPY_ARRAY_ENSUREARRAY) { + subtype = &PyArray_Type; + } - ret = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type); + Py_INCREF(dtype); + ret = (PyArrayObject *)PyArray_View(arr, NULL, subtype); if (ret == NULL) { return NULL; } + + if (flags & (NPY_ARRAY_MASKNA | NPY_ARRAY_OWNMASKNA)) { + int ownmaskna = (flags & NPY_ARRAY_OWNMASKNA) != 0; + if (PyArray_AllocateMaskNA(ret, ownmaskna, 0, 1) < 0) { + Py_DECREF(ret); + return NULL; + } + } } else { + Py_INCREF(arr); ret = arr; } - Py_INCREF(arr); } return (PyObject *)ret; diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index 5c4cb5264..81cae59cb 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -83,6 +83,43 @@ def test_array_maskna_construction(): assert_(a.flags.maskna) assert_equal(np.isna(a), True) +def test_array_maskna_asarray(): + a = np.arange(6).reshape(2,3) + + # Should not add an NA mask by default + res = np.asarray(a) + assert_(res is a) + assert_(not res.flags.maskna) + + # Should add an NA mask if requested + res = np.asarray(a, maskna=True) + assert_(res.flags.maskna) + assert_(res.flags.ownmaskna) + res = np.asarray(a, ownmaskna=True) + assert_(res.flags.maskna) + assert_(res.flags.ownmaskna) + + a.flags.maskna = True + + # Should view or create a copy of the NA mask + res = np.asarray(a) + assert_(res is a) + res = np.asarray(a, maskna=True) + assert_(res is a) + res = np.asarray(a, ownmaskna=True) + assert_(res is a) + + b = a.view() + assert_(not b.flags.ownmaskna) + + res = np.asarray(b) + assert_(res is b) + res = np.asarray(b, maskna=True) + assert_(res is b) + res = np.asarray(b, ownmaskna=True) + assert_(not (res is b)) + assert_(res.flags.ownmaskna) + def test_array_maskna_copy(): a = np.array([1,2,3]) b = np.array([2,3,4], maskna=True) @@ -444,10 +481,17 @@ def test_array_maskna_array_function_1D(): # Should produce a view with an owned mask with 'ownmaskna=True' c = np.array(b_view, copy=False, ownmaskna=True) - assert_(c.base is a) + assert_(c.base is b_view) assert_(c.flags.ownmaskna) assert_(not (c is b_view)) + # Should produce a view whose base is 'c', because 'c' owns + # the data for its mask + d = c.view() + assert_(d.base is c) + assert_(d.flags.maskna) + assert_(not d.flags.ownmaskna) + def test_array_maskna_setasflat(): # Copy from a C to a F array with some NAs a_orig = np.empty((2,3), order='C') |