summaryrefslogtreecommitdiff
path: root/numpy/lib/nanfunctions.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/nanfunctions.py')
-rw-r--r--numpy/lib/nanfunctions.py801
1 files changed, 468 insertions, 333 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index f0e635791..12f4a8376 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -1,215 +1,147 @@
-"""Functions that ignore nan.
+"""
+Functions that ignore NaN.
+
+Functions
+---------
+
+- `nanmin` -- minimum non-NaN value
+- `nanmax` -- maximum non-NaN value
+- `nanargmin` -- index of minimum non-NaN value
+- `nanargmax` -- index of maximum non-NaN value
+- `nansum` -- sum of non-NaN values
+- `nanmean` -- mean of non-NaN values
+- `nanvar` -- variance of non-NaN values
+- `nanstd` -- standard deviation of non-NaN values
+
+Classes
+-------
+- `NanWarning` -- Warning raised by nanfunctions
"""
from __future__ import division, absolute_import, print_function
+import warnings
import numpy as np
__all__ = [
'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
- 'nanvar', 'nanstd'
+ 'nanvar', 'nanstd', 'NanWarning'
]
-
-def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
- # Using array() instead of asanyarray() because the former always
- # makes a copy, which is important due to the copyto() action later
- arr = np.array(a, subok=True)
- mask = np.isnan(arr)
-
- # Cast bool, unsigned int, and int to float64
- if np.dtype is None and issubclass(arr.dtype.type, (np.integer, np.bool_)):
- ret = np.add.reduce(arr, axis=axis, dtype='f8',
- out=out, keepdims=keepdims)
- else:
- np.copyto(arr, 0.0, where=mask)
- ret = np.add.reduce(arr, axis=axis, dtype=dtype,
- out=out, keepdims=keepdims)
- rcount = (~mask).sum(axis=axis)
- if isinstance(ret, np.ndarray):
- ret = np.true_divide(ret, rcount, out=ret, casting='unsafe',
- subok=False)
- else:
- ret = ret / rcount
- return ret
-
-
-def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
- # Using array() instead of asanyarray() because the former always
- # makes a copy, which is important due to the copyto() action later
- arr = np.array(a, subok=True)
- mask = np.isnan(arr)
-
- # First compute the mean, saving 'rcount' for reuse later
- if dtype is None and issubclass(arr.dtype.type, (np.integer, np.bool_)):
- arrmean = np.add.reduce(arr, axis=axis, dtype='f8', keepdims=True)
- else:
- np.copyto(arr, 0.0, where=mask)
- arrmean = np.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True)
- rcount = (~mask).sum(axis=axis, keepdims=True)
- if isinstance(arrmean, np.ndarray):
- arrmean = np.true_divide(arrmean, rcount,
- out=arrmean, casting='unsafe', subok=False)
- else:
- arrmean = arrmean / rcount
-
- # arr - arrmean
- x = arr - arrmean
- np.copyto(x, 0.0, where=mask)
-
- # (arr - arrmean) ** 2
- if issubclass(arr.dtype.type, np.complex_):
- x = np.multiply(x, np.conjugate(x), out=x).real
- else:
- x = np.multiply(x, x, out=x)
-
- # add.reduce((arr - arrmean) ** 2, axis)
- ret = np.add.reduce(x, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
-
- # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof)
- if not keepdims and isinstance(rcount, np.ndarray):
- rcount = rcount.squeeze(axis=axis)
- rcount -= ddof
- if isinstance(ret, np.ndarray):
- ret = np.true_divide(ret, rcount, out=ret, casting='unsafe', subok=False)
- else:
- ret = ret / rcount
-
- return ret
+class NanWarning(RuntimeWarning): pass
-def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
- ret = _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
- keepdims=keepdims)
-
- if isinstance(ret, np.ndarray):
- ret = np.sqrt(ret, out=ret)
- else:
- ret = np.sqrt(ret)
-
- return ret
-
-
-def _nanop(op, fill, a, axis=None):
+def _replace_nan(a, val):
"""
- General operation on arrays with not-a-number values.
+ If `a` is of inexact type, make a copy of `a`, replace NaNs with
+ the `val` value, and return the copy together with a boolean mask
+ marking the locations where NaNs were present. If `a` is not of
+ inexact type, do nothing and return `a` together with a mask of None.
Parameters
----------
- op : callable
- Operation to perform.
- fill : float
- NaN values are set to fill before doing the operation.
a : array-like
Input array.
- axis : {int, None}, optional
- Axis along which the operation is computed.
- By default the input is flattened.
+ val : float
+ NaN values are set to val before doing the operation.
Returns
-------
- y : {ndarray, scalar}
- Processed data.
+ y : ndarray
+ If `a` is of inexact type, return a copy of `a` with the NaNs
+ replaced by the fill value, otherwise return `a`.
+ mask: {bool, None}
+ If `a` is of inexact type, return a boolean mask marking locations of
+ NaNs, otherwise return None.
"""
- y = np.array(a, subok=True)
-
- # We only need to take care of NaN's in floating point arrays
- dt = y.dtype
- if np.issubdtype(dt, np.integer) or np.issubdtype(dt, np.bool_):
- return op(y, axis=axis)
+ is_new = not isinstance(a, np.ndarray)
+ if is_new:
+ a = np.array(a)
+ if not issubclass(a.dtype.type, np.inexact):
+ return a, None
+ if not is_new:
+ # need copy
+ a = np.array(a, subok=True)
mask = np.isnan(a)
- # y[mask] = fill
- # We can't use fancy indexing here as it'll mess w/ MaskedArrays
- # Instead, let's fill the array directly...
- np.copyto(y, fill, where=mask)
- res = op(y, axis=axis)
- mask_all_along_axis = mask.all(axis=axis)
-
- # Along some axes, only nan's were encountered. As such, any values
- # calculated along that axis should be set to nan.
- if mask_all_along_axis.any():
- if np.isscalar(res):
- res = np.nan
- else:
- res[mask_all_along_axis] = np.nan
+ np.copyto(a, val, where=mask)
+ return a, mask
- return res
-
-def nansum(a, axis=None):
+def _copyto(a, val, mask):
"""
- Return the sum of array elements over a given axis treating
- Not a Numbers (NaNs) as zero.
+ Replace values in `a` with NaN where `mask` is True. This differs from
+ copyto in that it will deal with the case where `a` is a numpy scalar.
Parameters
----------
- a : array_like
- Array containing numbers whose sum is desired. If `a` is not an
- array, a conversion is attempted.
- axis : int, optional
- Axis along which the sum is computed. The default is to compute
- the sum of the flattened array.
+ a : ndarray or numpy scalar
+ Array or numpy scalar some of whose values are to be replaced
+ by val.
+ val : numpy scalar
+ Value used a replacement.
+ mask : ndarray, scalar
+ Boolean array. Where True the corresponding element of `a` is
+ replaced by `val`. Broadcasts.
Returns
-------
- y : ndarray
- An array with the same shape as a, with the specified axis removed.
- If a is a 0-d array, or if axis is None, a scalar is returned with
- the same dtype as `a`.
-
- See Also
- --------
- numpy.sum : Sum across array including Not a Numbers.
- isnan : Shows which elements are Not a Number (NaN).
- isfinite: Shows which elements are not: Not a Number, positive and
- negative infinity
+ res : ndarray, scalar
+ Array with elements replaced or scalar `val`.
- Notes
- -----
- Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic
- (IEEE 754). This means that Not a Number is not equivalent to infinity.
- If positive or negative infinity are present the result is positive or
- negative infinity. But if both positive and negative infinity are present,
- the result is Not A Number (NaN).
-
- Arithmetic is modular when using integer types (all elements of `a` must
- be finite i.e. no elements that are NaNs, positive infinity and negative
- infinity because NaNs are floating point types), and no error is raised
- on overflow.
+ """
+ if isinstance(a, np.ndarray):
+ np.copyto(a, val, where=mask, casting='unsafe')
+ else:
+ a = a.dtype.type(val)
+ return a
- Examples
- --------
- >>> np.nansum(1)
- 1
- >>> np.nansum([1])
- 1
- >>> np.nansum([1, np.nan])
- 1.0
- >>> a = np.array([[1, 1], [1, np.nan]])
- >>> np.nansum(a)
- 3.0
- >>> np.nansum(a, axis=0)
- array([ 2., 1.])
+def _divide_by_count(a, b, out=None):
+ """
+ Compute a/b ignoring invalid results. If `a` is an array the division
+ is done in place. If `a` is a scalar, then its type is preserved in the
+ output. If out is None, then then a is used instead so that the
+ division is in place.
- When positive infinity and negative infinity are present
+ Parameters
+ ----------
+ a : {ndarray, numpy scalar}
+ Numerator. Expected to be of inexact type but not checked.
+ b : {ndarray, numpy scalar}
+ Denominator.
+ out : ndarray, optional
+ Alternate output array in which to place the result. The default
+ is ``None``; if provided, it must have the same shape as the
+ expected output, but the type will be cast if necessary.
- >>> np.nansum([1, np.nan, np.inf])
- inf
- >>> np.nansum([1, np.nan, np.NINF])
- -inf
- >>> np.nansum([1, np.nan, np.inf, np.NINF])
- nan
+ Returns
+ -------
+ ret : {ndarray, numpy scalar}
+ The return value is a/b. If `a` was an ndarray the division is done
+ in place. If `a` is a numpy scalar, the division preserves its type.
"""
- return _nanop(np.sum, 0, a, axis)
+ with np.errstate(invalid='ignore'):
+ if isinstance(a, np.ndarray):
+ if out is None:
+ return np.divide(a, b, out=a, casting='unsafe')
+ else:
+ return np.divide(a, b, out=out, casting='unsafe')
+ else:
+ if out is None:
+ return a.dtype.type(a / b)
+ else:
+ # This is questionable, but currently a numpy scalar can
+ # be output to a zero dimensional array.
+ return np.divide(a, b, out=out, casting='unsafe')
-def nanmin(a, axis=None):
+def nanmin(a, axis=None, out=None, keepdims=False):
"""
- Return the minimum of an array or minimum along an axis, ignoring any NaNs.
+ Return the minimum of an array or minimum along an axis, ignoring any
+ NaNs.
Parameters
----------
@@ -219,6 +151,19 @@ def nanmin(a, axis=None):
axis : int, optional
Axis along which the minimum is computed. The default is to compute
the minimum of the flattened array.
+ out : ndarray, optional
+ Alternate output array in which to place the result. The default
+ is ``None``; if provided, it must have the same shape as the
+ expected output, but the type will be cast if necessary.
+ See `doc.ufuncs` for details.
+
+ .. versionadded:: 1.8.0
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `a`.
+
+ .. versionadded:: 1.8.0
Returns
-------
@@ -271,50 +216,10 @@ def nanmin(a, axis=None):
-inf
"""
- a = np.asanyarray(a)
- if axis is not None:
- return np.fmin.reduce(a, axis)
- else:
- return np.fmin.reduce(a.flat)
-
-
-def nanargmin(a, axis=None):
- """
- Return indices of the minimum values over an axis, ignoring NaNs.
-
- Parameters
- ----------
- a : array_like
- Input data.
- axis : int, optional
- Axis along which to operate. By default flattened input is used.
-
- Returns
- -------
- index_array : ndarray
- An array of indices or a single index value.
-
- See Also
- --------
- argmin, nanargmax
-
- Examples
- --------
- >>> a = np.array([[np.nan, 4], [2, 3]])
- >>> np.argmin(a)
- 0
- >>> np.nanargmin(a)
- 2
- >>> np.nanargmin(a, axis=0)
- array([1, 1])
- >>> np.nanargmin(a, axis=1)
- array([1, 0])
-
- """
- return _nanop(np.argmin, np.inf, a, axis)
+ return np.fmin.reduce(a, axis=axis, out=out, keepdims=keepdims)
-def nanmax(a, axis=None):
+def nanmax(a, axis=None, out=None, keepdims=False):
"""
Return the maximum of an array or maximum along an axis, ignoring any NaNs.
@@ -326,6 +231,19 @@ def nanmax(a, axis=None):
axis : int, optional
Axis along which the maximum is computed. The default is to compute
the maximum of the flattened array.
+ out : ndarray, optional
+ Alternate output array in which to place the result. The default
+ is ``None``; if provided, it must have the same shape as the
+ expected output, but the type will be cast if necessary.
+ See `doc.ufuncs` for details.
+
+ .. versionadded:: 1.8.0
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `a`.
+
+ .. versionadded:: 1.8.0
Returns
-------
@@ -378,16 +296,61 @@ def nanmax(a, axis=None):
inf
"""
- a = np.asanyarray(a)
- if axis is not None:
- return np.fmax.reduce(a, axis)
- else:
- return np.fmax.reduce(a.flat)
+ return np.fmax.reduce(a, axis=axis, out=out, keepdims=keepdims)
+
+
+def nanargmin(a, axis=None):
+ """
+ Return indices of the minimum values over an axis, ignoring NaNs. For
+ all NaN slices the minimum value of the np.intp type is returned and a
+ `NanWarning` is raised.
+
+ Parameters
+ ----------
+ a : array_like
+ Input data.
+ axis : int, optional
+ Axis along which to operate. By default flattened input is used.
+
+ Returns
+ -------
+ index_array : ndarray
+ An array of indices or a single index value.
+
+ See Also
+ --------
+ argmin, nanargmax
+
+ Examples
+ --------
+ >>> a = np.array([[np.nan, 4], [2, 3]])
+ >>> np.argmin(a)
+ 0
+ >>> np.nanargmin(a)
+ 2
+ >>> np.nanargmin(a, axis=0)
+ array([1, 1])
+ >>> np.nanargmin(a, axis=1)
+ array([1, 0])
+
+ """
+ a, mask = _replace_nan(a, np.inf)
+ if mask is None:
+ return np.argmin(a, axis)
+ # May later want to do something special for all nan slices.
+ mask = mask.all(axis=axis)
+ ind = np.argmin(a, axis)
+ if mask.any():
+ warnings.warn("All NaN axis detected.", NanWarning)
+ ind =_copyto(ind, np.iinfo(np.intp).min, mask)
+ return ind
def nanargmax(a, axis=None):
"""
- Return indices of the maximum values over an axis, ignoring NaNs.
+ Return indices of the maximum values over an axis, ignoring NaNs. For
+ all NaN slices the minimum value of the np.intp type is returned and
+ a `NanWarning` is raised.
Parameters
----------
@@ -418,7 +381,116 @@ def nanargmax(a, axis=None):
array([1, 1])
"""
- return _nanop(np.argmax, -np.inf, a, axis)
+ a, mask = _replace_nan(a, -np.inf)
+ if mask is None:
+ return np.argmax(a, axis)
+ # May later want to do something special for all nan slices.
+ mask = mask.all(axis=axis)
+ ind = np.argmax(a, axis)
+ if mask.any():
+ warnings.warn("All NaN axis detected.", NanWarning)
+ ind = _copyto(ind, np.iinfo(np.intp).min, mask)
+ return ind
+
+
+def nansum(a, axis=None, dtype=None, out=None, keepdims=0):
+ """
+ Return the sum of array elements over a given axis treating
+ Not a Numbers (NaNs) as zero.
+
+ FutureWarning: In Numpy versions <= 1.8 Nan is returned for slices that
+ are all NaN or empty. In later versions zero will be returned.
+
+
+ Parameters
+ ----------
+ a : array_like
+ Array containing numbers whose sum is desired. If `a` is not an
+ array, a conversion is attempted.
+ axis : int, optional
+ Axis along which the sum is computed. The default is to compute
+ the sum of the flattened array.
+ dtype : data-type, optional
+ Type to use in computing the sum. For integer inputs, the default
+ is the same as `int64`. For inexact inputs, it must be inexact.
+
+ .. versionadded:: 1.8.0
+ out : ndarray, optional
+ Alternate output array in which to place the result. The default
+ is ``None``. If provided, it must have the same shape as the
+ expected output, but the type will be cast if necessary.
+ See `doc.ufuncs` for details. The casting of NaN to integer can
+ yield unexpected results.
+
+ .. versionadded:: 1.8.0
+ keepdims : bool, optional
+ If True, the axes which are reduced are left in the result as
+ dimensions with size one. With this option, the result will
+ broadcast correctly against the original `arr`.
+
+ .. versionadded:: 1.8.0
+
+ Returns
+ -------
+ y : ndarray or numpy scalar
+
+ See Also
+ --------
+ numpy.sum : Sum across array propagating NaNs.
+ isnan : Show which elements are NaN.
+ isfinite: Show which elements are not NaN or +/-inf.
+
+ Notes
+ -----
+ Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic
+ (IEEE 754). This means that Not a Number is not equivalent to infinity.
+ If positive or negative infinity are present the result is positive or
+ negative infinity. But if both positive and negative infinity are present,
+ the result is Not A Number (NaN).
+
+ Arithmetic is modular when using integer types (all elements of `a` must
+ be finite i.e. no elements that are NaNs, positive infinity and negative
+ infinity because NaNs are floating point types), and no error is raised
+ on overflow.
+
+
+ Examples
+ --------
+ >>> np.nansum(1)
+ 1
+ >>> np.nansum([1])
+ 1
+ >>> np.nansum([1, np.nan])
+ 1.0
+ >>> a = np.array([[1, 1], [1, np.nan]])
+ >>> np.nansum(a)
+ 3.0
+ >>> np.nansum(a, axis=0)
+ array([ 2., 1.])
+
+ When positive infinity and negative infinity are present
+
+ >>> np.nansum([1, np.nan, np.inf])
+ inf
+ >>> np.nansum([1, np.nan, np.NINF])
+ -inf
+ >>> np.nansum([1, np.nan, np.inf, np.NINF])
+ nan
+
+ """
+ a, mask = _replace_nan(a, 0)
+ # In version 1.9 uncomment the following line and delete the rest.
+ #return a.sum(axis, dtype, out, keepdims)
+ warnings.warn("In Numpy 1.9 the sum along empty slices will be zero.",
+ FutureWarning)
+
+ if mask is None:
+ return a.sum(axis, dtype, out, keepdims)
+ mask = mask.all(axis, keepdims=keepdims)
+ tot = np.add.reduce(a, axis, dtype, out, keepdims)
+ if mask.any():
+ tot = _copyto(tot, np.nan, mask)
+ return tot
def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
@@ -429,6 +501,10 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
the flattened array by default, otherwise over the specified axis.
`float64` intermediate and return values are used for integer inputs.
+ For all NaN slices NaN is returned and a `NanWarning` is raised.
+
+ .. versionadded:: 1.8.0
+
Parameters
----------
a : array_like
@@ -439,7 +515,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
the mean of the flattened array.
dtype : data-type, optional
Type to use in computing the mean. For integer inputs, the default
- is `float64`; for floating point inputs, it is the same as the
+ is `float64`; for inexact inputs, it is the same as the
input dtype.
out : ndarray, optional
Alternate output array in which to place the result. The default
@@ -455,7 +531,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
-------
m : ndarray, see dtype parameter above
If `out=None`, returns a new array containing the mean values,
- otherwise a reference to the output array is returned.
+ otherwise a reference to the output array is returned. Nan is
+ returned for slices that contain only NaNs.
See Also
--------
@@ -465,8 +542,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
Notes
-----
- The arithmetic mean is the sum of the non-nan elements along the axis
- divided by the number of non-nan elements.
+ The arithmetic mean is the sum of the non-NaN elements along the axis
+ divided by the number of non-NaN elements.
Note that for floating-point input, the mean is computed using the
same precision the input has. Depending on the input data, this can
@@ -485,14 +562,161 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
array([ 1., 3.5])
"""
- if not (type(a) is np.ndarray):
- try:
- mean = a.nanmean
- return mean(axis=axis, dtype=dtype, out=out)
- except AttributeError:
- pass
+ arr, mask = _replace_nan(a, 0)
+ if mask is None:
+ return np.mean(arr, axis, dtype=dtype, out=out, keepdims=keepdims)
+
+ if dtype is not None:
+ dtype = np.dtype(dtype)
+ if dtype is not None and not issubclass(dtype.type, np.inexact):
+ raise TypeError("If a is inexact, then dtype must be inexact")
+ if out is not None and not issubclass(out.dtype.type, np.inexact):
+ raise TypeError("If a is inexact, then out must be inexact")
+
+ # The warning context speeds things up.
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ cnt = np.add.reduce(~mask, axis, dtype=np.intp, keepdims=keepdims)
+ tot = np.add.reduce(arr, axis, dtype=dtype, out=out, keepdims=keepdims)
+ avg = _divide_by_count(tot, cnt, out=out)
+
+ isbad = (cnt == 0)
+ if isbad.any():
+ warnings.warn("Mean of empty slice", NanWarning)
+ # NaN is the only possible bad value, so no further
+ # action is needed to handle bad results.
+ return avg
- return _nanmean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
+
+def nanvar(a, axis=None, dtype=None, out=None, ddof=0,
+ keepdims=False):
+ """
+ Compute the variance along the specified axis, while ignoring NaNs.
+
+ Returns the variance of the array elements, a measure of the spread of a
+ distribution. The variance is computed for the flattened array by
+ default, otherwise over the specified axis.
+
+ For all NaN slices NaN is returned and a `NanWarning` is raised.
+
+ .. versionadded:: 1.8.0
+
+ Parameters
+ ----------
+ a : array_like
+ Array containing numbers whose variance is desired. If `a` is not an
+ array, a conversion is attempted.
+ axis : int, optional
+ Axis along which the variance is computed. The default is to compute
+ the variance of the flattened array.
+ dtype : data-type, optional
+ Type to use in computing the variance. For arrays of integer type
+ the default is `float32`; for arrays of float types it is the same as
+ the array type.
+ out : ndarray, optional
+ Alternate output array in which to place the result. It must have
+ the same shape as the expected output, but the type is cast if
+ necessary.
+ ddof : int, optional
+ "Delta Degrees of Freedom": the divisor used in the calculation is
+ ``N - ddof``, where ``N`` represents the number of non-NaN
+ elements. By default `ddof` is zero.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
+
+ Returns
+ -------
+ variance : ndarray, see dtype parameter above
+ If `out` is None, return a new array containing the variance,
+ otherwise return a reference to the output array. If ddof is >= the
+ number of non-NaN elements in a slice or the slice contains only
+ NaNs, then the result for that slice is NaN.
+
+ See Also
+ --------
+ std : Standard deviation
+ mean : Average
+ var : Variance while not ignoring NaNs
+ nanstd, nanmean
+ numpy.doc.ufuncs : Section "Output arguments"
+
+ Notes
+ -----
+ The variance is the average of the squared deviations from the mean,
+ i.e., ``var = mean(abs(x - x.mean())**2)``.
+
+ The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
+ If, however, `ddof` is specified, the divisor ``N - ddof`` is used
+ instead. In standard statistical practice, ``ddof=1`` provides an
+ unbiased estimator of the variance of a hypothetical infinite population.
+ ``ddof=0`` provides a maximum likelihood estimate of the variance for
+ normally distributed variables.
+
+ Note that for complex numbers, the absolute value is taken before
+ squaring, so that the result is always real and nonnegative.
+
+ For floating-point input, the variance is computed using the same
+ precision the input has. Depending on the input data, this can cause
+ the results to be inaccurate, especially for `float32` (see example
+ below). Specifying a higher-accuracy accumulator using the ``dtype``
+ keyword can alleviate this issue.
+
+ Examples
+ --------
+ >>> a = np.array([[1, np.nan], [3, 4]])
+ >>> np.var(a)
+ 1.5555555555555554
+ >>> np.nanvar(a, axis=0)
+ array([ 1., 0.])
+ >>> np.nanvar(a, axis=1)
+ array([ 0., 0.25])
+
+ """
+ arr, mask = _replace_nan(a, 0)
+ if mask is None:
+ return np.var(arr, axis, dtype=dtype, out=out, keepdims=keepdims)
+
+ if dtype is not None:
+ dtype = np.dtype(dtype)
+ if dtype is not None and not issubclass(dtype.type, np.inexact):
+ raise TypeError("If a is inexact, then dtype must be inexact")
+ if out is not None and not issubclass(out.dtype.type, np.inexact):
+ raise TypeError("If a is inexact, then out must be inexact")
+
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+
+ # Compute mean
+ cnt = np.add.reduce(~mask, axis, dtype=np.intp, keepdims=True)
+ tot = np.add.reduce(arr, axis, dtype=dtype, keepdims=True)
+ avg = np.divide(tot, cnt, out=tot)
+
+ # Compute squared deviation from mean.
+ x = arr - avg
+ np.copyto(x, 0, where=mask)
+ if issubclass(arr.dtype.type, np.complexfloating):
+ sqr = np.multiply(x, x.conj(), out=x).real
+ else:
+ sqr = np.multiply(x, x, out=x)
+
+ # adjust cnt.
+ if not keepdims:
+ cnt = cnt.squeeze(axis)
+ cnt -= ddof
+
+ # Compute variance.
+ var = np.add.reduce(sqr, axis, dtype=dtype, out=out, keepdims=keepdims)
+ var = _divide_by_count(var, cnt)
+
+ isbad = (cnt <= 0)
+ if isbad.any():
+ warnings.warn("Degrees of freedom <= 0 for slice.", NanWarning)
+ # NaN, inf, or negative numbers are all possible bad
+ # values, so explicitly replace them with NaN.
+ var = _copyto(var, np.nan, isbad)
+ return var
def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
@@ -504,6 +728,10 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
of the non-NaN array elements. The standard deviation is computed for the
flattened array by default, otherwise over the specified axis.
+ For all NaN slices NaN is returned and a `NanWarning` is raised.
+
+ .. versionadded:: 1.8.0
+
Parameters
----------
a : array_like
@@ -521,8 +749,8 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
values) will be cast if necessary.
ddof : int, optional
Means Delta Degrees of Freedom. The divisor used in calculations
- is ``N - ddof``, where ``N`` represents the number of elements.
- By default `ddof` is zero.
+ is ``N - ddof``, where ``N`` represents the number of non-NaN
+ elements. By default `ddof` is zero.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
@@ -531,8 +759,10 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
Returns
-------
standard_deviation : ndarray, see dtype parameter above.
- If `out` is None, return a new array containing the standard deviation,
- otherwise return a reference to the output array.
+ If `out` is None, return a new array containing the standard
+ deviation, otherwise return a reference to the output array. If
+ ddof is >= the number of non-NaN elements in a slice or the slice
+ contains only NaNs, then the result for that slice is NaN.
See Also
--------
@@ -575,104 +805,9 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
array([ 0., 0.5])
"""
-
- if not (type(a) is np.ndarray):
- try:
- nanstd = a.nanstd
- return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof)
- except AttributeError:
- pass
-
- return _nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
- keepdims=keepdims)
-
-
-def nanvar(a, axis=None, dtype=None, out=None, ddof=0,
- keepdims=False):
- """
- Compute the variance along the specified axis, while ignoring NaNs.
-
- Returns the variance of the array elements, a measure of the spread of a
- distribution. The variance is computed for the flattened array by
- default, otherwise over the specified axis.
-
- Parameters
- ----------
- a : array_like
- Array containing numbers whose variance is desired. If `a` is not an
- array, a conversion is attempted.
- axis : int, optional
- Axis along which the variance is computed. The default is to compute
- the variance of the flattened array.
- dtype : data-type, optional
- Type to use in computing the variance. For arrays of integer type
- the default is `float32`; for arrays of float types it is the same as
- the array type.
- out : ndarray, optional
- Alternate output array in which to place the result. It must have
- the same shape as the expected output, but the type is cast if
- necessary.
- ddof : int, optional
- "Delta Degrees of Freedom": the divisor used in the calculation is
- ``N - ddof``, where ``N`` represents the number of elements. By
- default `ddof` is zero.
- keepdims : bool, optional
- If this is set to True, the axes which are reduced are left
- in the result as dimensions with size one. With this option,
- the result will broadcast correctly against the original `arr`.
-
- Returns
- -------
- variance : ndarray, see dtype parameter above
- If ``out=None``, returns a new array containing the variance;
- otherwise, a reference to the output array is returned.
-
- See Also
- --------
- std : Standard deviation
- mean : Average
- var : Variance while not ignoring NaNs
- nanstd, nanmean
- numpy.doc.ufuncs : Section "Output arguments"
-
- Notes
- -----
- The variance is the average of the squared deviations from the mean,
- i.e., ``var = mean(abs(x - x.mean())**2)``.
-
- The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
- If, however, `ddof` is specified, the divisor ``N - ddof`` is used
- instead. In standard statistical practice, ``ddof=1`` provides an
- unbiased estimator of the variance of a hypothetical infinite population.
- ``ddof=0`` provides a maximum likelihood estimate of the variance for
- normally distributed variables.
-
- Note that for complex numbers, the absolute value is taken before
- squaring, so that the result is always real and nonnegative.
-
- For floating-point input, the variance is computed using the same
- precision the input has. Depending on the input data, this can cause
- the results to be inaccurate, especially for `float32` (see example
- below). Specifying a higher-accuracy accumulator using the ``dtype``
- keyword can alleviate this issue.
-
- Examples
- --------
- >>> a = np.array([[1, np.nan], [3, 4]])
- >>> np.var(a)
- 1.5555555555555554
- >>> np.nanvar(a, axis=0)
- array([ 1., 0.])
- >>> np.nanvar(a, axis=1)
- array([ 0., 0.25])
-
- """
- if not (type(a) is np.ndarray):
- try:
- nanvar = a.nanvar
- return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof)
- except AttributeError:
- pass
-
- return _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
- keepdims=keepdims)
+ var = nanvar(a, axis, dtype, out, ddof, keepdims)
+ if isinstance(var, np.ndarray):
+ std = np.sqrt(var, out=var)
+ else:
+ std = var.dtype.type(np.sqrt(var))
+ return std