summaryrefslogtreecommitdiff
path: root/numpy/core/fromnumeric.py
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-17 23:07:58 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:56 -0600
commita112fc4a6b28fbb85e1b0c6d423095d13cf7b226 (patch)
tree07ce0d495f708debcf76be16f7cfb66ea0a1ddb5 /numpy/core/fromnumeric.py
parent0fa4f22fec4b19e2a8c1d93e5a1f955167c9addd (diff)
downloadnumpy-a112fc4a6b28fbb85e1b0c6d423095d13cf7b226.tar.gz
ENH: missingdata: Implement skipna= support for np.std and np.var
Diffstat (limited to 'numpy/core/fromnumeric.py')
-rw-r--r--numpy/core/fromnumeric.py113
1 files changed, 93 insertions, 20 deletions
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 4682386d7..03cb427cd 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -868,7 +868,7 @@ def resize(a, new_shape):
return reshape(a, new_shape)
-def squeeze(a):
+def squeeze(a, axis=None):
"""
Remove single-dimensional entries from the shape of an array.
@@ -876,12 +876,17 @@ def squeeze(a):
----------
a : array_like
Input data.
+ axis : None or int or tuple of ints, optional
+ Selects a subset of the single-dimensional entries in the
+ shape. If an axis is selected with shape entry greater than
+ one, that axis is left untouched.
Returns
-------
squeezed : ndarray
- The input array, but with with all dimensions of length 1
- removed. Whenever possible, a view on `a` is returned.
+ The input array, but with with all or a subset of the
+ dimensions of length 1 removed. This is always `a` itself
+ or a view into `a`.
Examples
--------
@@ -896,7 +901,7 @@ def squeeze(a):
squeeze = a.squeeze
except AttributeError:
return _wrapit(a, 'squeeze')
- return squeeze()
+ return squeeze(axis=axis)
def diagonal(a, offset=0, axis1=0, axis2=1):
@@ -2432,14 +2437,15 @@ def mean(a, axis=None, dtype=None, out=None, skipna=False, keepdims=False):
out=out, skipna=skipna, keepdims=keepdims)
rcount = mu.count_reduce_items(arr, axis=axis,
skipna=skipna, keepdims=keepdims)
- if type(ret) is mu.ndarray:
+ if isinstance(ret, mu.ndarray):
um.true_divide(ret, rcount, out=ret, casting='unsafe')
else:
ret = ret / float(rcount)
return ret
-def std(a, axis=None, dtype=None, out=None, ddof=0):
+def std(a, axis=None, dtype=None, out=None, ddof=0,
+ skipna=False, keepdims=False):
"""
Compute the standard deviation along the specified axis.
@@ -2466,6 +2472,13 @@ def std(a, axis=None, dtype=None, out=None, ddof=0):
Means Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` represents the number of elements.
By default `ddof` is zero.
+ skipna : bool, optional
+ If this is set to True, skips any NA values during calculation
+ instead of propagating them.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
Returns
-------
@@ -2526,14 +2539,25 @@ def std(a, axis=None, dtype=None, out=None, ddof=0):
0.44999999925552653
"""
- try:
- std = a.std
- except AttributeError:
- return _wrapit(a, 'std', axis, dtype, out, ddof)
- return std(axis, dtype, out, ddof)
+ if not (type(a) is mu.ndarray):
+ try:
+ std = a.std
+ return std(axis=axis, dtype=dtype, out=out, ddof=ddof)
+ except AttributeError:
+ pass
+
+ ret = var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
+ skipna=skipna, keepdims=keepdims)
+
+ if isinstance(ret, mu.ndarray):
+ um.sqrt(ret, out=ret)
+ else:
+ ret = um.sqrt(ret)
+ return ret
-def var(a, axis=None, dtype=None, out=None, ddof=0):
+def var(a, axis=None, dtype=None, out=None, ddof=0,
+ skipna=False, keepdims=False):
"""
Compute the variance along the specified axis.
@@ -2561,6 +2585,13 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By
default `ddof` is zero.
+ skipna : bool, optional
+ If this is set to True, skips any NA values during calculation
+ instead of propagating them.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
Returns
-------
@@ -2600,9 +2631,9 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
>>> a = np.array([[1,2],[3,4]])
>>> np.var(a)
1.25
- >>> np.var(a,0)
+ >>> np.var(a, axis=0)
array([ 1., 1.])
- >>> np.var(a,1)
+ >>> np.var(a, axis=1)
array([ 0.25, 0.25])
In single precision, var() can be inaccurate:
@@ -2613,7 +2644,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
>>> np.var(a)
0.20405951142311096
- Computing the standard deviation in float64 is more accurate:
+ Computing the variance in float64 is more accurate:
>>> np.var(a, dtype=np.float64)
0.20249999932997387
@@ -2621,8 +2652,50 @@ def var(a, axis=None, dtype=None, out=None, ddof=0):
0.20250000000000001
"""
- try:
- var = a.var
- except AttributeError:
- return _wrapit(a, 'var', axis, dtype, out, ddof)
- return var(axis, dtype, out, ddof)
+ if not (type(a) is mu.ndarray):
+ try:
+ var = a.var
+ return var(axis=axis, dtype=dtype, out=out, ddof=ddof)
+ except AttributeError:
+ pass
+
+ arr = asarray(a)
+
+ # First compute the mean, saving 'rcount' for reuse later
+ if dtype is None and arr.dtype.kind in ['b','u','i']:
+ arrmean = um.add.reduce(arr, axis=axis, dtype='f8',
+ skipna=skipna, keepdims=True)
+ else:
+ arrmean = um.add.reduce(arr, axis=axis, dtype=dtype,
+ skipna=skipna, keepdims=True)
+ rcount = mu.count_reduce_items(arr, axis=axis,
+ skipna=skipna, keepdims=True)
+ if isinstance(arrmean, mu.ndarray):
+ um.true_divide(arrmean, rcount, out=arrmean, casting='unsafe')
+ else:
+ arrmean = arrmean / float(rcount)
+
+ # arr - arrmean
+ x = arr - arrmean
+
+ # (arr - arrmean) ** 2
+ if arr.dtype.kind == 'c':
+ um.multiply(x, um.conjugate(x), out=x)
+ x = x.real
+ else:
+ um.multiply(x, x, out=x)
+
+ # add.reduce((arr - arrmean) ** 2, axis)
+ ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out,
+ skipna=skipna, keepdims=keepdims)
+
+ # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof)
+ if not keepdims and isinstance(rcount, mu.ndarray):
+ rcount = rcount.squeeze(axis=axis)
+ rcount -= ddof
+ if isinstance(ret, mu.ndarray):
+ um.true_divide(ret, rcount, out=ret, casting='unsafe')
+ else:
+ ret = ret / float(rcount)
+
+ return ret