diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-08-17 23:07:58 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:56 -0600 |
commit | a112fc4a6b28fbb85e1b0c6d423095d13cf7b226 (patch) | |
tree | 07ce0d495f708debcf76be16f7cfb66ea0a1ddb5 /numpy | |
parent | 0fa4f22fec4b19e2a8c1d93e5a1f955167c9addd (diff) | |
download | numpy-a112fc4a6b28fbb85e1b0c6d423095d13cf7b226.tar.gz |
ENH: missingdata: Implement skipna= support for np.std and np.var
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 2 | ||||
-rw-r--r-- | numpy/core/fromnumeric.py | 113 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 33 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.c | 57 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.h | 6 |
5 files changed, 183 insertions, 28 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index b402d2150..711b7de52 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -4194,7 +4194,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('sort', add_newdoc('numpy.core.multiarray', 'ndarray', ('squeeze', """ - a.squeeze() + a.squeeze(axis=None) Remove single-dimensional entries from the shape of `a`. diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 4682386d7..03cb427cd 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -868,7 +868,7 @@ def resize(a, new_shape): return reshape(a, new_shape) -def squeeze(a): +def squeeze(a, axis=None): """ Remove single-dimensional entries from the shape of an array. @@ -876,12 +876,17 @@ def squeeze(a): ---------- a : array_like Input data. + axis : None or int or tuple of ints, optional + Selects a subset of the single-dimensional entries in the + shape. If an axis is selected with shape entry greater than + one, that axis is left untouched. Returns ------- squeezed : ndarray - The input array, but with with all dimensions of length 1 - removed. Whenever possible, a view on `a` is returned. + The input array, but with with all or a subset of the + dimensions of length 1 removed. This is always `a` itself + or a view into `a`. Examples -------- @@ -896,7 +901,7 @@ def squeeze(a): squeeze = a.squeeze except AttributeError: return _wrapit(a, 'squeeze') - return squeeze() + return squeeze(axis=axis) def diagonal(a, offset=0, axis1=0, axis2=1): @@ -2432,14 +2437,15 @@ def mean(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False): out=out, skipna=skipna, keepdims=keepdims) rcount = mu.count_reduce_items(arr, axis=axis, skipna=skipna, keepdims=keepdims) - if type(ret) is mu.ndarray: + if isinstance(ret, mu.ndarray): um.true_divide(ret, rcount, out=ret, casting='unsafe') else: ret = ret / float(rcount) return ret -def std(a, axis=None, dtype=None, out=None, ddof=0): +def std(a, axis=None, dtype=None, out=None, ddof=0, + skipna=False, keepdims=False): """ Compute the standard deviation along the specified axis. @@ -2466,6 +2472,13 @@ def std(a, axis=None, dtype=None, out=None, ddof=0): Means Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of elements. By default `ddof` is zero. + skipna : bool, optional + If this is set to True, skips any NA values during calculation + instead of propagating them. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. Returns ------- @@ -2526,14 +2539,25 @@ def std(a, axis=None, dtype=None, out=None, ddof=0): 0.44999999925552653 """ - try: - std = a.std - except AttributeError: - return _wrapit(a, 'std', axis, dtype, out, ddof) - return std(axis, dtype, out, ddof) + if not (type(a) is mu.ndarray): + try: + std = a.std + return std(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + ret = var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + skipna=skipna, keepdims=keepdims) + + if isinstance(ret, mu.ndarray): + um.sqrt(ret, out=ret) + else: + ret = um.sqrt(ret) + return ret -def var(a, axis=None, dtype=None, out=None, ddof=0): +def var(a, axis=None, dtype=None, out=None, ddof=0, + skipna=False, keepdims=False): """ Compute the variance along the specified axis. @@ -2561,6 +2585,13 @@ def var(a, axis=None, dtype=None, out=None, ddof=0): "Delta Degrees of Freedom": the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements. By default `ddof` is zero. + skipna : bool, optional + If this is set to True, skips any NA values during calculation + instead of propagating them. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. Returns ------- @@ -2600,9 +2631,9 @@ def var(a, axis=None, dtype=None, out=None, ddof=0): >>> a = np.array([[1,2],[3,4]]) >>> np.var(a) 1.25 - >>> np.var(a,0) + >>> np.var(a, axis=0) array([ 1., 1.]) - >>> np.var(a,1) + >>> np.var(a, axis=1) array([ 0.25, 0.25]) In single precision, var() can be inaccurate: @@ -2613,7 +2644,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0): >>> np.var(a) 0.20405951142311096 - Computing the standard deviation in float64 is more accurate: + Computing the variance in float64 is more accurate: >>> np.var(a, dtype=np.float64) 0.20249999932997387 @@ -2621,8 +2652,50 @@ def var(a, axis=None, dtype=None, out=None, ddof=0): 0.20250000000000001 """ - try: - var = a.var - except AttributeError: - return _wrapit(a, 'var', axis, dtype, out, ddof) - return var(axis, dtype, out, ddof) + if not (type(a) is mu.ndarray): + try: + var = a.var + return var(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + arr = asarray(a) + + # First compute the mean, saving 'rcount' for reuse later + if dtype is None and arr.dtype.kind in ['b','u','i']: + arrmean = um.add.reduce(arr, axis=axis, dtype='f8', + skipna=skipna, keepdims=True) + else: + arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, + skipna=skipna, keepdims=True) + rcount = mu.count_reduce_items(arr, axis=axis, + skipna=skipna, keepdims=True) + if isinstance(arrmean, mu.ndarray): + um.true_divide(arrmean, rcount, out=arrmean, casting='unsafe') + else: + arrmean = arrmean / float(rcount) + + # arr - arrmean + x = arr - arrmean + + # (arr - arrmean) ** 2 + if arr.dtype.kind == 'c': + um.multiply(x, um.conjugate(x), out=x) + x = x.real + else: + um.multiply(x, x, out=x) + + # add.reduce((arr - arrmean) ** 2, axis) + ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out, + skipna=skipna, keepdims=keepdims) + + # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof) + if not keepdims and isinstance(rcount, mu.ndarray): + rcount = rcount.squeeze(axis=axis) + rcount -= ddof + if isinstance(ret, mu.ndarray): + um.true_divide(ret, rcount, out=ret, casting='unsafe') + else: + ret = ret / float(rcount) + + return ret diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 649ff734d..6a5a5620b 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -16,10 +16,12 @@ #include "common.h" #include "ctors.h" #include "calculation.h" - -#include "methods.h" #include "convert_datatype.h" #include "item_selection.h" +#include "conversion_utils.h" +#include "shape.h" + +#include "methods.h" /* NpyArg_ParseKeywords @@ -138,12 +140,28 @@ array_reshape(PyArrayObject *self, PyObject *args, PyObject *kwds) } static PyObject * -array_squeeze(PyArrayObject *self, PyObject *args) +array_squeeze(PyArrayObject *self, PyObject *args, PyObject *kwds) { - if (!PyArg_ParseTuple(args, "")) { + PyObject *axis_in = NULL; + npy_bool axis_flags[NPY_MAXDIMS]; + + static char *kwlist[] = {"axis", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, + &axis_in)) { return NULL; } - return PyArray_Squeeze(self); + + if (axis_in == NULL) { + return PyArray_Squeeze(self); + } + else { + if (PyArray_ConvertMultiAxis(axis_in, PyArray_NDIM(self), + axis_flags) != NPY_SUCCEED) { + return NULL; + } + + return PyArray_SqueezeSelected(self, axis_flags); + } } static PyObject * @@ -160,8 +178,9 @@ array_view(PyArrayObject *self, PyObject *args, PyObject *kwds) &out_dtype, &out_type, &maskna, - &ownmaskna)) + &ownmaskna)) { return NULL; + } /* If user specified a positional argument, guess whether it represents a type or a dtype for backward compatibility. */ @@ -2382,7 +2401,7 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"squeeze", (PyCFunction)array_squeeze, - METH_VARARGS, NULL}, + METH_VARARGS | METH_KEYWORDS, NULL}, {"std", (PyCFunction)array_stddev, METH_VARARGS | METH_KEYWORDS, NULL}, diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 496d32955..97b0b204a 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -708,6 +708,59 @@ PyArray_Squeeze(PyArrayObject *self) return (PyObject *)ret; } +/* + * Just like PyArray_Squeeze, but allows the caller to select + * a subset of the size-one dimensions to squeeze out. + */ +NPY_NO_EXPORT PyObject * +PyArray_SqueezeSelected(PyArrayObject *self, npy_bool *axis_flags) +{ + PyArrayObject *ret; + npy_bool unit_dims[NPY_MAXDIMS]; + int idim, ndim, any_ones; + npy_intp *shape; + + ndim = PyArray_NDIM(self); + shape = PyArray_SHAPE(self); + + /* Verify that the axes requested are all of size one */ + any_ones = 0; + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim] != 0 && shape[idim] == 1) { + unit_dims[idim] = 1; + any_ones = 1; + } + else { + unit_dims[idim] = 0; + } + } + + /* If there were no axes to squeeze out, return the same array */ + if (!any_ones) { + Py_INCREF(self); + return (PyObject *)self; + } + + ret = (PyArrayObject *)PyArray_View(self, NULL, &PyArray_Type); + if (ret == NULL) { + return NULL; + } + + PyArray_RemoveAxesInPlace(ret, unit_dims); + + /* + * If self isn't not a base class ndarray, call its + * __array_wrap__ method + */ + if (Py_TYPE(self) != &PyArray_Type) { + PyArrayObject *tmp = PyArray_SubclassWrap(self, ret); + Py_DECREF(ret); + ret = tmp; + } + + return (PyObject *)ret; +} + /*NUMPY_API * SwapAxes */ @@ -1196,6 +1249,10 @@ build_shape_string(npy_intp n, npy_intp *vals) * has a shape entry bigger than one, this effectively selects * index zero for that axis. * + * WARNING: If an axis flagged for removal has a shape equal to zero, + * the array will point to invalid memory. The caller must + * validate this! + * * For example, this can be used to remove the reduction axes * from a reduction result once its computation is complete. */ diff --git a/numpy/core/src/multiarray/shape.h b/numpy/core/src/multiarray/shape.h index a293254a7..0451a463e 100644 --- a/numpy/core/src/multiarray/shape.h +++ b/numpy/core/src/multiarray/shape.h @@ -21,5 +21,11 @@ NPY_NO_EXPORT void PyArray_CreateMultiSortedStridePerm(int narrays, PyArrayObject **arrays, int ndim, int *out_strideperm); +/* + * Just like PyArray_Squeeze, but allows the caller to select + * a subset of the size-one dimensions to squeeze out. + */ +NPY_NO_EXPORT PyObject * +PyArray_SqueezeSelected(PyArrayObject *self, npy_bool *axis_flags); #endif |