From ba4d1161fe4943cb720f35c0abfd0581628255d6 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Tue, 16 Aug 2011 19:11:22 -0700 Subject: BUG: missingdata: Fix mask usage in PyArray_TakeFrom, add tests for it --- doc/neps/missing-data.rst | 55 ++++++++++++++---------------- doc/release/2.0.0-notes.rst | 17 ++++++++- numpy/core/arrayprint.py | 4 +-- numpy/core/src/multiarray/item_selection.c | 37 +++++++------------- numpy/core/tests/test_maskna.py | 15 ++++++++ 5 files changed, 71 insertions(+), 57 deletions(-) diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst index e83bd2189..197a3107d 100644 --- a/doc/neps/missing-data.rst +++ b/doc/neps/missing-data.rst @@ -237,21 +237,15 @@ mask [Exposed, Exposed, Hidden, Exposed], and values [1.0, 2.0, , 7.0] for the masked and NA dtype versions respectively. -It may be worth overloading the np.NA __call__ method to accept a dtype, -returning a zero-dimensional array with a missing value of that dtype. -Without doing this, NA printouts would look like:: +The np.NA singleton may accept a dtype= keyword parameter, indicating +that it should be treated as an NA of a particular data type. This is also +a mechanism for preserving the dtype in a NumPy scalar-like fashion. +Here's what this could look like:: >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], maskna=True)) - array(NA, dtype='float64', maskna=True) - >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]')) - array(NA, dtype='NA[>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], maskna=True)) - NA('float64') + NA(dtype='>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]')) - NA('NA[>> a = np.array([1,2]) - >>> b = a.view() - >>> b.flags.hasmaskna = True + >>> b = a.view(maskna=True) >>> b - array([1,2], maskna=True) + array([1, 2], maskna=True) >>> b[0] = np.NA >>> b - array([NA,2], maskna=True) + array([NA, 2], maskna=True) >>> a - array([1,2]) + array([1, 2]) >>> # The underlying number 1 value in 'a[0]' was untouched Copying values between the mask-based implementation and the @@ -322,8 +309,16 @@ these semantics without the extra manipulation. A manual loop through a masked array like:: - for i in xrange(len(a)): - a[i] = np.log(a[i]) + >>> a = np.arange(5., maskna=True) + >>> a[3] = np.NA + >>> a + array([ 0., 1., 2., NA, 4.], maskna=True) + >>> for i in xrange(len(a)): + ... a[i] = np.log(a[i]) + ... + __main__:2: RuntimeWarning: divide by zero encountered in log + >>> a + array([ -inf, 0. , 0.69314718, NA, 1.38629436], maskna=True) works even with masked values, because 'a[i]' returns a zero-dimensional array with a missing value instead of the singleton np.NA for the missing diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst index ddedf85de..0ba7594fe 100644 --- a/doc/release/2.0.0-notes.rst +++ b/doc/release/2.0.0-notes.rst @@ -29,7 +29,8 @@ What works with NA: * Array methods: + ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod, ndarray.conjugate, ndarray.diagonal - + numpy.concatenate + + numpy.concatenate, numpy.column_stack, numpy.hstack, + numpy.vstack, numpy.dstack What doesn't work with NA: * Fancy indexing, such as with lists and partial boolean masks. @@ -42,6 +43,7 @@ What doesn't work with NA: rules NA | True == True and NA & False == False yet. * Array methods: + ndarray.argmax, ndarray.argmin, + + numpy.repeat Custom formatter for printing arrays @@ -62,6 +64,12 @@ view into the original array instead of making a copy. This makes these functions more consistent with NumPy's general approach of taking views where possible, and performs much faster as well. +The function np.concatenate tries to match the layout of its input +arrays. Previously, the layout did not follow any particular reason, +and depended in an undesirable on the particular axis chosen for +concatenation. A bug was also fixed which silently allowed out of bounds +axis arguments. + Deprecations ============ @@ -69,3 +77,10 @@ Deprecations Specifying a custom string formatter with a `_format` array attribute is deprecated. The new `formatter` keyword in ``numpy.set_printoptions`` or ``numpy.array2string`` can be used instead. + +In the C API, direct access to the fields of PyArrayObject* has been +deprecated. Direct access has been recommended against for many releases, but +now you can test your code against the deprecated C API by #defining +NPY_NO_DEPRECATED_API before including any NumPy headers. Expect +something similar for PyArray_Descr* and other core objects in the +future as preparation for NumPy 2.0. diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 506cce8cc..e4df5428c 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -556,8 +556,8 @@ class FloatFormat(object): max_val = 0. min_val = 0. else: - max_val = maximum.reduce(non_zero) - min_val = minimum.reduce(non_zero) + max_val = maximum.reduce(non_zero, skipna=True) + min_val = minimum.reduce(non_zero, skipna=True) if max_val >= 1.e8: self.exp_format = True if not self.suppress_small and (min_val < 0.0001 diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 827b8e4d4..ff217be62 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -15,6 +15,7 @@ #include "numpy/npy_3kcompat.h" #include "common.h" +#include "arrayobject.h" #include "ctors.h" #include "lowlevel_strided_loops.h" #include "na_singleton.h" @@ -43,7 +44,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, return NULL; } indices = (PyArrayObject *)PyArray_ContiguousFromAny(indices0, - PyArray_INTP, + NPY_INTP, 1, 0); if (indices == NULL) { goto fail; @@ -161,17 +162,6 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, src_maskna = PyArray_MASKNA_DATA(self); dst_maskna = PyArray_MASKNA_DATA(obj); - if (PyDataType_REFCHK(PyArray_DESCR(self))) { - /* - * TODO: Should use PyArray_GetDTypeTransferFunction - * instead of raw memmove to remedy this. - */ - PyErr_SetString(PyExc_RuntimeError, - "ndarray.take doesn't support object arrays with " - "masks yet"); - NPY_AUXDATA_FREE(transferdata); - goto fail; - } switch(clipmode) { case NPY_RAISE: @@ -183,14 +173,13 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, } if ((tmp < 0) || (tmp >= max_item)) { PyErr_SetString(PyExc_IndexError, - "index out of range "\ - "for array"); + "index out of range for array"); NPY_AUXDATA_FREE(transferdata); goto fail; } maskedstransfer(dest, itemsize, src + tmp*chunk, itemsize, - (npy_mask *)src_maskna, 1, + (npy_mask *)(src_maskna + tmp*nelem), 1, nelem, itemsize, transferdata); dest += chunk; memmove(dst_maskna, src_maskna + tmp*nelem, nelem); @@ -216,7 +205,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, } maskedstransfer(dest, itemsize, src + tmp*chunk, itemsize, - (npy_mask *)src_maskna, 1, + (npy_mask *)(src_maskna + tmp*nelem), 1, nelem, itemsize, transferdata); dest += chunk; memmove(dst_maskna, src_maskna + tmp*nelem, nelem); @@ -238,7 +227,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis, } maskedstransfer(dest, itemsize, src + tmp*chunk, itemsize, - (npy_mask *)src_maskna, 1, + (npy_mask *)(src_maskna + tmp*nelem), 1, nelem, itemsize, transferdata); dest += chunk; memmove(dst_maskna, src_maskna + tmp*nelem, nelem); @@ -373,7 +362,7 @@ PyArray_PutTo(PyArrayObject *self, PyObject* values0, PyObject *indices0, dest = PyArray_DATA(self); chunk = PyArray_DESCR(self)->elsize; indices = (PyArrayObject *)PyArray_ContiguousFromAny(indices0, - PyArray_INTP, 0, 0); + NPY_INTP, 0, 0); if (indices == NULL) { goto fail; } @@ -640,7 +629,7 @@ PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis) PyArrayObject *ret = NULL; char *new_data, *old_data; - repeats = (PyArrayObject *)PyArray_ContiguousFromAny(op, PyArray_INTP, 0, 1); + repeats = (PyArrayObject *)PyArray_ContiguousFromAny(op, NPY_INTP, 0, 1); if (repeats == NULL) { return NULL; } @@ -1209,7 +1198,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) if ((n == 0) || (PyArray_SIZE(op) == 1)) { ret = (PyArrayObject *)PyArray_New(Py_TYPE(op), PyArray_NDIM(op), PyArray_DIMS(op), - PyArray_INTP, + NPY_INTP, NULL, NULL, 0, 0, (PyObject *)op); if (ret == NULL) { @@ -1248,7 +1237,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) return NULL; } ret = (PyArrayObject *)PyArray_New(Py_TYPE(op), PyArray_NDIM(op), - PyArray_DIMS(op), PyArray_INTP, + PyArray_DIMS(op), NPY_INTP, NULL, NULL, 0, 0, (PyObject *)op); if (ret == NULL) { goto fail; @@ -1371,7 +1360,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis) /* single element case */ ret = (PyArrayObject *)PyArray_New(&PyArray_Type, PyArray_NDIM(mps[0]), PyArray_DIMS(mps[0]), - PyArray_INTP, + NPY_INTP, NULL, NULL, 0, 0, NULL); if (ret == NULL) { @@ -1391,7 +1380,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis) /* Now do the sorting */ ret = (PyArrayObject *)PyArray_New(&PyArray_Type, PyArray_NDIM(mps[0]), - PyArray_DIMS(mps[0]), PyArray_INTP, + PyArray_DIMS(mps[0]), NPY_INTP, NULL, NULL, 0, 0, NULL); if (ret == NULL) { goto fail; @@ -1622,7 +1611,7 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, NPY_SEARCHSIDE side) } /* ret is a contiguous array of intp type to hold returned indices */ ret = (PyArrayObject *)PyArray_New(Py_TYPE(ap2), PyArray_NDIM(ap2), - PyArray_DIMS(ap2), PyArray_INTP, + PyArray_DIMS(ap2), NPY_INTP, NULL, NULL, 0, 0, (PyObject *)ap2); if (ret == NULL) { goto fail; diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index 0f63cc8bd..4dd2b13e5 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -632,6 +632,12 @@ def test_maskna_take_1D(): assert_equal([c[0], c[2]], [0,4]) assert_equal(np.isna(c), [0,1,0]) + # Take with an NA just at the start + a = np.arange(5, maskna=True) + a[0] = np.NA + res = a.take([1,2,3,4]) + assert_equal(res, [1,2,3,4]) + def test_maskna_ufunc_1D(): a_orig = np.arange(3) a = a_orig.view(maskna=True) @@ -951,6 +957,15 @@ def test_array_maskna_column_stack(): assert_equal(np.isna(res), [[0,0], [0,0], [0,1]]) assert_equal(res[~np.isna(res)], [1,2,2,3,3]) +def test_array_maskna_compress(): + # ndarray.compress + a = np.arange(5., maskna=True) + a[0] = np.NA + + mask = np.array([0,1,1,1,1], dtype='?') + res = a.compress(mask) + assert_equal(res, [1,2,3,4]) + if __name__ == "__main__": run_module_suite() -- cgit v1.2.1