summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/neps/missing-data.rst55
-rw-r--r--doc/release/2.0.0-notes.rst17
-rw-r--r--numpy/core/arrayprint.py4
-rw-r--r--numpy/core/src/multiarray/item_selection.c37
-rw-r--r--numpy/core/tests/test_maskna.py15
5 files changed, 71 insertions, 57 deletions
diff --git a/doc/neps/missing-data.rst b/doc/neps/missing-data.rst
index e83bd2189..197a3107d 100644
--- a/doc/neps/missing-data.rst
+++ b/doc/neps/missing-data.rst
@@ -237,21 +237,15 @@ mask [Exposed, Exposed, Hidden, Exposed], and
values [1.0, 2.0, <NA bitpattern>, 7.0] for the masked and
NA dtype versions respectively.
-It may be worth overloading the np.NA __call__ method to accept a dtype,
-returning a zero-dimensional array with a missing value of that dtype.
-Without doing this, NA printouts would look like::
+The np.NA singleton may accept a dtype= keyword parameter, indicating
+that it should be treated as an NA of a particular data type. This is also
+a mechanism for preserving the dtype in a NumPy scalar-like fashion.
+Here's what this could look like::
>>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], maskna=True))
- array(NA, dtype='float64', maskna=True)
- >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]'))
- array(NA, dtype='NA[<f8]')
-
-but with this, they could be printed as::
-
- >>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], maskna=True))
- NA('float64')
+ NA(dtype='<f8')
>>> np.sum(np.array([1.0, 2.0, np.NA, 7.0], dtype='NA[f8]'))
- NA('NA[<f8]')
+ NA(dtype='NA[<f8]')
Assigning a value to an array always causes that element to not be NA,
transparently unmasking it if necessary. Assigning numpy.NA to the array
@@ -259,32 +253,25 @@ masks that element or assigns the NA bitpattern for the particular dtype.
In the mask-based implementation, the storage behind a missing value may never
be accessed in any way, other than to unmask it by assigning its value.
-While numpy.NA works to mask values, it does not itself have a dtype.
-This means that returning the numpy.NA singleton from an operation
-like 'arr[0]' would be throwing away the dtype, which is still
-valuable to retain, so 'arr[0]' will return a zero-dimensional
-array either with its value masked, or containing the NA bitpattern
-for the array's dtype. To test if the value is missing, the function
-"np.isna(arr[0])" will be provided. One of the key reasons for the
-NumPy scalars is to allow their values into dictionaries. Having a
-missing value as the key in a dictionary is a bad idea, so the NumPy
-scalars will not support missing values in any form.
+To test if a value is missing, the function "np.isna(arr[0])" will
+be provided. One of the key reasons for the NumPy scalars is to allow
+their values into dictionaries.
All operations which write to masked arrays will not affect the value
unless they also unmask that value. This allows the storage behind
masked elements to still be relied on if they are still accessible
-from another view which doesn't have them masked. For example::
+from another view which doesn't have them masked. For example, the
+following was run on the missingdata work-in-progress branch::
>>> a = np.array([1,2])
- >>> b = a.view()
- >>> b.flags.hasmaskna = True
+ >>> b = a.view(maskna=True)
>>> b
- array([1,2], maskna=True)
+ array([1, 2], maskna=True)
>>> b[0] = np.NA
>>> b
- array([NA,2], maskna=True)
+ array([NA, 2], maskna=True)
>>> a
- array([1,2])
+ array([1, 2])
>>> # The underlying number 1 value in 'a[0]' was untouched
Copying values between the mask-based implementation and the
@@ -322,8 +309,16 @@ these semantics without the extra manipulation.
A manual loop through a masked array like::
- for i in xrange(len(a)):
- a[i] = np.log(a[i])
+ >>> a = np.arange(5., maskna=True)
+ >>> a[3] = np.NA
+ >>> a
+ array([ 0., 1., 2., NA, 4.], maskna=True)
+ >>> for i in xrange(len(a)):
+ ... a[i] = np.log(a[i])
+ ...
+ __main__:2: RuntimeWarning: divide by zero encountered in log
+ >>> a
+ array([ -inf, 0. , 0.69314718, NA, 1.38629436], maskna=True)
works even with masked values, because 'a[i]' returns a zero-dimensional
array with a missing value instead of the singleton np.NA for the missing
diff --git a/doc/release/2.0.0-notes.rst b/doc/release/2.0.0-notes.rst
index ddedf85de..0ba7594fe 100644
--- a/doc/release/2.0.0-notes.rst
+++ b/doc/release/2.0.0-notes.rst
@@ -29,7 +29,8 @@ What works with NA:
* Array methods:
+ ndarray.clip, ndarray.min, ndarray.max, ndarray.sum, ndarray.prod,
ndarray.conjugate, ndarray.diagonal
- + numpy.concatenate
+ + numpy.concatenate, numpy.column_stack, numpy.hstack,
+ numpy.vstack, numpy.dstack
What doesn't work with NA:
* Fancy indexing, such as with lists and partial boolean masks.
@@ -42,6 +43,7 @@ What doesn't work with NA:
rules NA | True == True and NA & False == False yet.
* Array methods:
+ ndarray.argmax, ndarray.argmin,
+ + numpy.repeat
Custom formatter for printing arrays
@@ -62,6 +64,12 @@ view into the original array instead of making a copy. This makes these
functions more consistent with NumPy's general approach of taking views
where possible, and performs much faster as well.
+The function np.concatenate tries to match the layout of its input
+arrays. Previously, the layout did not follow any particular reason,
+and depended in an undesirable on the particular axis chosen for
+concatenation. A bug was also fixed which silently allowed out of bounds
+axis arguments.
+
Deprecations
============
@@ -69,3 +77,10 @@ Deprecations
Specifying a custom string formatter with a `_format` array attribute is
deprecated. The new `formatter` keyword in ``numpy.set_printoptions`` or
``numpy.array2string`` can be used instead.
+
+In the C API, direct access to the fields of PyArrayObject* has been
+deprecated. Direct access has been recommended against for many releases, but
+now you can test your code against the deprecated C API by #defining
+NPY_NO_DEPRECATED_API before including any NumPy headers. Expect
+something similar for PyArray_Descr* and other core objects in the
+future as preparation for NumPy 2.0.
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 506cce8cc..e4df5428c 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -556,8 +556,8 @@ class FloatFormat(object):
max_val = 0.
min_val = 0.
else:
- max_val = maximum.reduce(non_zero)
- min_val = minimum.reduce(non_zero)
+ max_val = maximum.reduce(non_zero, skipna=True)
+ min_val = minimum.reduce(non_zero, skipna=True)
if max_val >= 1.e8:
self.exp_format = True
if not self.suppress_small and (min_val < 0.0001
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 827b8e4d4..ff217be62 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -15,6 +15,7 @@
#include "numpy/npy_3kcompat.h"
#include "common.h"
+#include "arrayobject.h"
#include "ctors.h"
#include "lowlevel_strided_loops.h"
#include "na_singleton.h"
@@ -43,7 +44,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
return NULL;
}
indices = (PyArrayObject *)PyArray_ContiguousFromAny(indices0,
- PyArray_INTP,
+ NPY_INTP,
1, 0);
if (indices == NULL) {
goto fail;
@@ -161,17 +162,6 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
src_maskna = PyArray_MASKNA_DATA(self);
dst_maskna = PyArray_MASKNA_DATA(obj);
- if (PyDataType_REFCHK(PyArray_DESCR(self))) {
- /*
- * TODO: Should use PyArray_GetDTypeTransferFunction
- * instead of raw memmove to remedy this.
- */
- PyErr_SetString(PyExc_RuntimeError,
- "ndarray.take doesn't support object arrays with "
- "masks yet");
- NPY_AUXDATA_FREE(transferdata);
- goto fail;
- }
switch(clipmode) {
case NPY_RAISE:
@@ -183,14 +173,13 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
}
if ((tmp < 0) || (tmp >= max_item)) {
PyErr_SetString(PyExc_IndexError,
- "index out of range "\
- "for array");
+ "index out of range for array");
NPY_AUXDATA_FREE(transferdata);
goto fail;
}
maskedstransfer(dest, itemsize,
src + tmp*chunk, itemsize,
- (npy_mask *)src_maskna, 1,
+ (npy_mask *)(src_maskna + tmp*nelem), 1,
nelem, itemsize, transferdata);
dest += chunk;
memmove(dst_maskna, src_maskna + tmp*nelem, nelem);
@@ -216,7 +205,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
}
maskedstransfer(dest, itemsize,
src + tmp*chunk, itemsize,
- (npy_mask *)src_maskna, 1,
+ (npy_mask *)(src_maskna + tmp*nelem), 1,
nelem, itemsize, transferdata);
dest += chunk;
memmove(dst_maskna, src_maskna + tmp*nelem, nelem);
@@ -238,7 +227,7 @@ PyArray_TakeFrom(PyArrayObject *self0, PyObject *indices0, int axis,
}
maskedstransfer(dest, itemsize,
src + tmp*chunk, itemsize,
- (npy_mask *)src_maskna, 1,
+ (npy_mask *)(src_maskna + tmp*nelem), 1,
nelem, itemsize, transferdata);
dest += chunk;
memmove(dst_maskna, src_maskna + tmp*nelem, nelem);
@@ -373,7 +362,7 @@ PyArray_PutTo(PyArrayObject *self, PyObject* values0, PyObject *indices0,
dest = PyArray_DATA(self);
chunk = PyArray_DESCR(self)->elsize;
indices = (PyArrayObject *)PyArray_ContiguousFromAny(indices0,
- PyArray_INTP, 0, 0);
+ NPY_INTP, 0, 0);
if (indices == NULL) {
goto fail;
}
@@ -640,7 +629,7 @@ PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis)
PyArrayObject *ret = NULL;
char *new_data, *old_data;
- repeats = (PyArrayObject *)PyArray_ContiguousFromAny(op, PyArray_INTP, 0, 1);
+ repeats = (PyArrayObject *)PyArray_ContiguousFromAny(op, NPY_INTP, 0, 1);
if (repeats == NULL) {
return NULL;
}
@@ -1209,7 +1198,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
if ((n == 0) || (PyArray_SIZE(op) == 1)) {
ret = (PyArrayObject *)PyArray_New(Py_TYPE(op), PyArray_NDIM(op),
PyArray_DIMS(op),
- PyArray_INTP,
+ NPY_INTP,
NULL, NULL, 0, 0,
(PyObject *)op);
if (ret == NULL) {
@@ -1248,7 +1237,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
return NULL;
}
ret = (PyArrayObject *)PyArray_New(Py_TYPE(op), PyArray_NDIM(op),
- PyArray_DIMS(op), PyArray_INTP,
+ PyArray_DIMS(op), NPY_INTP,
NULL, NULL, 0, 0, (PyObject *)op);
if (ret == NULL) {
goto fail;
@@ -1371,7 +1360,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
/* single element case */
ret = (PyArrayObject *)PyArray_New(&PyArray_Type, PyArray_NDIM(mps[0]),
PyArray_DIMS(mps[0]),
- PyArray_INTP,
+ NPY_INTP,
NULL, NULL, 0, 0, NULL);
if (ret == NULL) {
@@ -1391,7 +1380,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis)
/* Now do the sorting */
ret = (PyArrayObject *)PyArray_New(&PyArray_Type, PyArray_NDIM(mps[0]),
- PyArray_DIMS(mps[0]), PyArray_INTP,
+ PyArray_DIMS(mps[0]), NPY_INTP,
NULL, NULL, 0, 0, NULL);
if (ret == NULL) {
goto fail;
@@ -1622,7 +1611,7 @@ PyArray_SearchSorted(PyArrayObject *op1, PyObject *op2, NPY_SEARCHSIDE side)
}
/* ret is a contiguous array of intp type to hold returned indices */
ret = (PyArrayObject *)PyArray_New(Py_TYPE(ap2), PyArray_NDIM(ap2),
- PyArray_DIMS(ap2), PyArray_INTP,
+ PyArray_DIMS(ap2), NPY_INTP,
NULL, NULL, 0, 0, (PyObject *)ap2);
if (ret == NULL) {
goto fail;
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index 0f63cc8bd..4dd2b13e5 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -632,6 +632,12 @@ def test_maskna_take_1D():
assert_equal([c[0], c[2]], [0,4])
assert_equal(np.isna(c), [0,1,0])
+ # Take with an NA just at the start
+ a = np.arange(5, maskna=True)
+ a[0] = np.NA
+ res = a.take([1,2,3,4])
+ assert_equal(res, [1,2,3,4])
+
def test_maskna_ufunc_1D():
a_orig = np.arange(3)
a = a_orig.view(maskna=True)
@@ -951,6 +957,15 @@ def test_array_maskna_column_stack():
assert_equal(np.isna(res), [[0,0], [0,0], [0,1]])
assert_equal(res[~np.isna(res)], [1,2,2,3,3])
+def test_array_maskna_compress():
+ # ndarray.compress
+ a = np.arange(5., maskna=True)
+ a[0] = np.NA
+
+ mask = np.array([0,1,1,1,1], dtype='?')
+ res = a.compress(mask)
+ assert_equal(res, [1,2,3,4])
+
if __name__ == "__main__":
run_module_suite()