summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-17 23:07:58 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:56 -0600
commita112fc4a6b28fbb85e1b0c6d423095d13cf7b226 (patch)
tree07ce0d495f708debcf76be16f7cfb66ea0a1ddb5 /numpy
parent0fa4f22fec4b19e2a8c1d93e5a1f955167c9addd (diff)
downloadnumpy-a112fc4a6b28fbb85e1b0c6d423095d13cf7b226.tar.gz
ENH: missingdata: Implement skipna= support for np.std and np.var
Diffstat (limited to 'numpy')
-rw-r--r--numpy/add_newdocs.py2
-rw-r--r--numpy/core/fromnumeric.py113
-rw-r--r--numpy/core/src/multiarray/methods.c33
-rw-r--r--numpy/core/src/multiarray/shape.c57
-rw-r--r--numpy/core/src/multiarray/shape.h6
5 files changed, 183 insertions, 28 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index b402d2150..711b7de52 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -4194,7 +4194,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('sort',
add_newdoc('numpy.core.multiarray', 'ndarray', ('squeeze',
"""
- a.squeeze()
+ a.squeeze(axis=None)
Remove single-dimensional entries from the shape of `a`.
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 4682386d7..03cb427cd 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -868,7 +868,7 @@ def resize(a, new_shape):
return reshape(a, new_shape)
-def squeeze(a):
+def squeeze(a, axis=None):
"""
Remove single-dimensional entries from the shape of an array.
@@ -876,12 +876,17 @@ def squeeze(a):
----------
a : array_like
Input data.
+ axis : None or int or tuple of ints, optional
+ Selects a subset of the single-dimensional entries in the
+ shape. If an axis is selected with shape entry greater than
+ one, that axis is left untouched.
Returns
-------
squeezed : ndarray
- The input array, but with with all dimensions of length 1
- removed. Whenever possible, a view on `a` is returned.
+ The input array, but with with all or a subset of the
+ dimensions of length 1 removed. This is always `a` itself
+ or a view into `a`.
Examples
--------
@@ -896,7 +901,7 @@ def squeeze(a):
squeeze = a.squeeze
except AttributeError:
return _wrapit(a, 'squeeze')
- return squeeze()
+ return squeeze(axis=axis)
def diagonal(a, offset=0, axis1=0, axis2=1):
@@ -2432,14 +2437,15 @@ def mean(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False):
out=out, skipna=skipna, keepdims=keepdims)
rcount = mu.count_reduce_items(arr, axis=axis,
skipna=skipna, keepdims=keepdims)
- if type(ret) is mu.ndarray:
+ if isinstance(ret, mu.ndarray):
um.true_divide(ret, rcount, out=ret, casting='unsafe')
else:
ret = ret / float(rcount)
return ret
-def std(a, axis=None, dtype=None, out=None, ddof=0):
+def std(a, axis=None, dtype=None, out=None, ddof=0,
+ skipna=False, keepdims=False):
"""
Compute the standard deviation along the specified axis.
@@ -2466,6 +2472,13 @@ def std(a, axis=None, dtype=None, out=None, ddof=0):
Means Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` represents the number of elements.
By default `ddof` is zero.
+ skipna : bool, optional
+ If this is set to True, skips any NA values during calculation
+ instead of propagating them.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
Returns
-------
@@ -2526,14 +2539,25 @@ def std(a, axis=None, dtype=None, out=None, ddof=0):
0.44999999925552653
"""
- try:
- std = a.std
- except AttributeError:
- return _wrapit(a, 'std', axis, dtype, out, ddof)
- return std(axis, dtype, out, ddof)
+ if not (type(a) is mu.ndarray):
+ try:
+ std = a.std
+ return std(axis=axis, dtype=dtype, out=out, ddof=ddof)
+ except AttributeError:
+ pass
+
+ ret = var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
+ skipna=skipna, keepdims=keepdims)
+
+ if isinstance(ret, mu.ndarray):
+ um.sqrt(ret, out=ret)
+ else:
+ ret = um.sqrt(ret)
+ return ret
-def var(a, axis=None, dtype=None, out=None, ddof=0):
+def var(a, axis=None, dtype=None, out=None, ddof=0,
+ skipna=False, keepdims=False):
"""
Compute the variance along the specified axis.
@@ -2561,6 +2585,13 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By
default `ddof` is zero.
+ skipna : bool, optional
+ If this is set to True, skips any NA values during calculation
+ instead of propagating them.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
Returns
-------
@@ -2600,9 +2631,9 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
>>> a = np.array([[1,2],[3,4]])
>>> np.var(a)
1.25
- >>> np.var(a,0)
+ >>> np.var(a, axis=0)
array([ 1., 1.])
- >>> np.var(a,1)
+ >>> np.var(a, axis=1)
array([ 0.25, 0.25])
In single precision, var() can be inaccurate:
@@ -2613,7 +2644,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
>>> np.var(a)
0.20405951142311096
- Computing the standard deviation in float64 is more accurate:
+ Computing the variance in float64 is more accurate:
>>> np.var(a, dtype=np.float64)
0.20249999932997387
@@ -2621,8 +2652,50 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
0.20250000000000001
"""
- try:
- var = a.var
- except AttributeError:
- return _wrapit(a, 'var', axis, dtype, out, ddof)
- return var(axis, dtype, out, ddof)
+ if not (type(a) is mu.ndarray):
+ try:
+ var = a.var
+ return var(axis=axis, dtype=dtype, out=out, ddof=ddof)
+ except AttributeError:
+ pass
+
+ arr = asarray(a)
+
+ # First compute the mean, saving 'rcount' for reuse later
+ if dtype is None and arr.dtype.kind in ['b','u','i']:
+ arrmean = um.add.reduce(arr, axis=axis, dtype='f8',
+ skipna=skipna, keepdims=True)
+ else:
+ arrmean = um.add.reduce(arr, axis=axis, dtype=dtype,
+ skipna=skipna, keepdims=True)
+ rcount = mu.count_reduce_items(arr, axis=axis,
+ skipna=skipna, keepdims=True)
+ if isinstance(arrmean, mu.ndarray):
+ um.true_divide(arrmean, rcount, out=arrmean, casting='unsafe')
+ else:
+ arrmean = arrmean / float(rcount)
+
+ # arr - arrmean
+ x = arr - arrmean
+
+ # (arr - arrmean) ** 2
+ if arr.dtype.kind == 'c':
+ um.multiply(x, um.conjugate(x), out=x)
+ x = x.real
+ else:
+ um.multiply(x, x, out=x)
+
+ # add.reduce((arr - arrmean) ** 2, axis)
+ ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out,
+ skipna=skipna, keepdims=keepdims)
+
+ # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof)
+ if not keepdims and isinstance(rcount, mu.ndarray):
+ rcount = rcount.squeeze(axis=axis)
+ rcount -= ddof
+ if isinstance(ret, mu.ndarray):
+ um.true_divide(ret, rcount, out=ret, casting='unsafe')
+ else:
+ ret = ret / float(rcount)
+
+ return ret
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 649ff734d..6a5a5620b 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -16,10 +16,12 @@
#include "common.h"
#include "ctors.h"
#include "calculation.h"
-
-#include "methods.h"
#include "convert_datatype.h"
#include "item_selection.h"
+#include "conversion_utils.h"
+#include "shape.h"
+
+#include "methods.h"
/* NpyArg_ParseKeywords
@@ -138,12 +140,28 @@ array_reshape(PyArrayObject *self, PyObject *args, PyObject *kwds)
}
static PyObject *
-array_squeeze(PyArrayObject *self, PyObject *args)
+array_squeeze(PyArrayObject *self, PyObject *args, PyObject *kwds)
{
- if (!PyArg_ParseTuple(args, "")) {
+ PyObject *axis_in = NULL;
+ npy_bool axis_flags[NPY_MAXDIMS];
+
+ static char *kwlist[] = {"axis", NULL};
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist,
+ &axis_in)) {
return NULL;
}
- return PyArray_Squeeze(self);
+
+ if (axis_in == NULL) {
+ return PyArray_Squeeze(self);
+ }
+ else {
+ if (PyArray_ConvertMultiAxis(axis_in, PyArray_NDIM(self),
+ axis_flags) != NPY_SUCCEED) {
+ return NULL;
+ }
+
+ return PyArray_SqueezeSelected(self, axis_flags);
+ }
}
static PyObject *
@@ -160,8 +178,9 @@ array_view(PyArrayObject *self, PyObject *args, PyObject *kwds)
&out_dtype,
&out_type,
&maskna,
- &ownmaskna))
+ &ownmaskna)) {
return NULL;
+ }
/* If user specified a positional argument, guess whether it
represents a type or a dtype for backward compatibility. */
@@ -2382,7 +2401,7 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"squeeze",
(PyCFunction)array_squeeze,
- METH_VARARGS, NULL},
+ METH_VARARGS | METH_KEYWORDS, NULL},
{"std",
(PyCFunction)array_stddev,
METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 496d32955..97b0b204a 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -708,6 +708,59 @@ PyArray_Squeeze(PyArrayObject *self)
return (PyObject *)ret;
}
+/*
+ * Just like PyArray_Squeeze, but allows the caller to select
+ * a subset of the size-one dimensions to squeeze out.
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_SqueezeSelected(PyArrayObject *self, npy_bool *axis_flags)
+{
+ PyArrayObject *ret;
+ npy_bool unit_dims[NPY_MAXDIMS];
+ int idim, ndim, any_ones;
+ npy_intp *shape;
+
+ ndim = PyArray_NDIM(self);
+ shape = PyArray_SHAPE(self);
+
+ /* Verify that the axes requested are all of size one */
+ any_ones = 0;
+ for (idim = 0; idim < ndim; ++idim) {
+ if (axis_flags[idim] != 0 && shape[idim] == 1) {
+ unit_dims[idim] = 1;
+ any_ones = 1;
+ }
+ else {
+ unit_dims[idim] = 0;
+ }
+ }
+
+ /* If there were no axes to squeeze out, return the same array */
+ if (!any_ones) {
+ Py_INCREF(self);
+ return (PyObject *)self;
+ }
+
+ ret = (PyArrayObject *)PyArray_View(self, NULL, &PyArray_Type);
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ PyArray_RemoveAxesInPlace(ret, unit_dims);
+
+ /*
+ * If self isn't not a base class ndarray, call its
+ * __array_wrap__ method
+ */
+ if (Py_TYPE(self) != &PyArray_Type) {
+ PyArrayObject *tmp = PyArray_SubclassWrap(self, ret);
+ Py_DECREF(ret);
+ ret = tmp;
+ }
+
+ return (PyObject *)ret;
+}
+
/*NUMPY_API
* SwapAxes
*/
@@ -1196,6 +1249,10 @@ build_shape_string(npy_intp n, npy_intp *vals)
* has a shape entry bigger than one, this effectively selects
* index zero for that axis.
*
+ * WARNING: If an axis flagged for removal has a shape equal to zero,
+ * the array will point to invalid memory. The caller must
+ * validate this!
+ *
* For example, this can be used to remove the reduction axes
* from a reduction result once its computation is complete.
*/
diff --git a/numpy/core/src/multiarray/shape.h b/numpy/core/src/multiarray/shape.h
index a293254a7..0451a463e 100644
--- a/numpy/core/src/multiarray/shape.h
+++ b/numpy/core/src/multiarray/shape.h
@@ -21,5 +21,11 @@ NPY_NO_EXPORT void
PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays,
int ndim, int *out_strideperm);
+/*
+ * Just like PyArray_Squeeze, but allows the caller to select
+ * a subset of the size-one dimensions to squeeze out.
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_SqueezeSelected(PyArrayObject *self, npy_bool *axis_flags);
#endif