diff options
Diffstat (limited to 'numpy/lib/nanfunctions.py')
-rw-r--r-- | numpy/lib/nanfunctions.py | 237 |
1 files changed, 178 insertions, 59 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 8d6b0f139..6cffab6ac 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -22,9 +22,15 @@ Functions """ from __future__ import division, absolute_import, print_function +import functools import warnings import numpy as np from numpy.lib import function_base +from numpy.core import overrides + + +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy') __all__ = [ @@ -34,6 +40,33 @@ __all__ = [ ] +def _nan_mask(a, out=None): + """ + Parameters + ---------- + a : array-like + Input array with at least 1 dimension. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output and will prevent the allocation of a new array. + + Returns + ------- + y : bool ndarray or True + A bool array where ``np.nan`` positions are marked with ``False`` + and other positions are marked with ``True``. If the type of ``a`` + is such that it can't possibly contain ``np.nan``, returns ``True``. + """ + # we assume that a is an array for this private function + + if a.dtype.kind not in 'fc': + return True + + y = np.isnan(a, out=out) + y = np.invert(y, out=y) + return y + def _replace_nan(a, val): """ If `a` is of inexact type, make a copy of `a`, replace NaNs with @@ -132,7 +165,8 @@ def _remove_nan_1d(arr1d, overwrite_input=False): c = np.isnan(arr1d) s = np.nonzero(c)[0] if s.size == arr1d.size: - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=4) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=5) return arr1d[:0], True elif s.size == 0: return arr1d, overwrite_input @@ -188,6 +222,11 @@ def _divide_by_count(a, b, out=None): return np.divide(a, b, out=out, casting='unsafe') +def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanmin_dispatcher) def nanmin(a, axis=None, out=None, keepdims=np._NoValue): """ Return minimum of an array or minimum along an axis, ignoring any NaNs. @@ -260,9 +299,9 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmin(a) 1.0 >>> np.nanmin(a, axis=0) - array([ 1., 2.]) + array([1., 2.]) >>> np.nanmin(a, axis=1) - array([ 1., 3.]) + array([1., 3.]) When positive infinity and negative infinity are present: @@ -280,7 +319,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=3) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, +np.inf) @@ -292,10 +332,16 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN axis encountered", RuntimeWarning, + stacklevel=3) return res +def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanmax_dispatcher) def nanmax(a, axis=None, out=None, keepdims=np._NoValue): """ Return the maximum of an array or maximum along an axis, ignoring any @@ -368,9 +414,9 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmax(a) 3.0 >>> np.nanmax(a, axis=0) - array([ 3., 2.]) + array([3., 2.]) >>> np.nanmax(a, axis=1) - array([ 2., 3.]) + array([2., 3.]) When positive infinity and negative infinity are present: @@ -388,7 +434,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=3) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, -np.inf) @@ -400,10 +447,16 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN axis encountered", RuntimeWarning, + stacklevel=3) return res +def _nanargmin_dispatcher(a, axis=None): + return (a,) + + +@array_function_dispatch(_nanargmin_dispatcher) def nanargmin(a, axis=None): """ Return the indices of the minimum values in the specified axis ignoring @@ -448,6 +501,11 @@ def nanargmin(a, axis=None): return res +def _nanargmax_dispatcher(a, axis=None): + return (a,) + + +@array_function_dispatch(_nanargmax_dispatcher) def nanargmax(a, axis=None): """ Return the indices of the maximum values in the specified axis ignoring @@ -493,6 +551,11 @@ def nanargmax(a, axis=None): return res +def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nansum_dispatcher) def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Return the sum of array elements over a given axis treating Not a @@ -570,12 +633,15 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nansum(a) 3.0 >>> np.nansum(a, axis=0) - array([ 2., 1.]) + array([2., 1.]) >>> np.nansum([1, np.nan, np.inf]) inf >>> np.nansum([1, np.nan, np.NINF]) -inf - >>> np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present nan """ @@ -583,6 +649,11 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) +def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanprod_dispatcher) def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Return the product of array elements over a given axis treating Not a @@ -641,13 +712,18 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nanprod(a) 6.0 >>> np.nanprod(a, axis=0) - array([ 3., 2.]) + array([3., 2.]) """ a, mask = _replace_nan(a, 1) return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) +def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_nancumsum_dispatcher) def nancumsum(a, axis=None, dtype=None, out=None): """ Return the cumulative sum of array elements over a given axis treating Not a @@ -697,22 +773,27 @@ def nancumsum(a, axis=None, dtype=None, out=None): >>> np.nancumsum([1]) array([1]) >>> np.nancumsum([1, np.nan]) - array([ 1., 1.]) + array([1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumsum(a) - array([ 1., 3., 6., 6.]) + array([1., 3., 6., 6.]) >>> np.nancumsum(a, axis=0) - array([[ 1., 2.], - [ 4., 2.]]) + array([[1., 2.], + [4., 2.]]) >>> np.nancumsum(a, axis=1) - array([[ 1., 3.], - [ 3., 3.]]) + array([[1., 3.], + [3., 3.]]) """ a, mask = _replace_nan(a, 0) return np.cumsum(a, axis=axis, dtype=dtype, out=out) +def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None): + return (a, out) + + +@array_function_dispatch(_nancumprod_dispatcher) def nancumprod(a, axis=None, dtype=None, out=None): """ Return the cumulative product of array elements over a given axis treating Not a @@ -759,22 +840,27 @@ def nancumprod(a, axis=None, dtype=None, out=None): >>> np.nancumprod([1]) array([1]) >>> np.nancumprod([1, np.nan]) - array([ 1., 1.]) + array([1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumprod(a) - array([ 1., 2., 6., 6.]) + array([1., 2., 6., 6.]) >>> np.nancumprod(a, axis=0) - array([[ 1., 2.], - [ 3., 2.]]) + array([[1., 2.], + [3., 2.]]) >>> np.nancumprod(a, axis=1) - array([[ 1., 2.], - [ 3., 3.]]) + array([[1., 2.], + [3., 3.]]) """ a, mask = _replace_nan(a, 1) return np.cumprod(a, axis=axis, dtype=dtype, out=out) +def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanmean_dispatcher) def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Compute the arithmetic mean along the specified axis, ignoring NaNs. @@ -844,9 +930,9 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nanmean(a) 2.6666666666666665 >>> np.nanmean(a, axis=0) - array([ 2., 4.]) + array([2., 4.]) >>> np.nanmean(a, axis=1) - array([ 1., 3.5]) + array([1., 3.5]) # may vary """ arr, mask = _replace_nan(a, 0) @@ -866,7 +952,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): isbad = (cnt == 0) if isbad.any(): - warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) + warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3) # NaN is the only possible bad value, so no further # action is needed to handle bad results. return avg @@ -878,7 +964,7 @@ def _nanmedian1d(arr1d, overwrite_input=False): See nanmedian for parameter usage """ arr1d, overwrite_input = _remove_nan_1d(arr1d, - overwrite_input=overwrite_input) + overwrite_input=overwrite_input) if arr1d.size == 0: return np.nan @@ -921,13 +1007,20 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): a = np.ma.masked_array(a, np.isnan(a)) m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) for i in range(np.count_nonzero(m.mask.ravel())): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=4) if out is not None: out[...] = m.filled(np.nan) return out return m.filled(np.nan) +def _nanmedian_dispatcher( + a, axis=None, out=None, overwrite_input=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanmedian_dispatcher) def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue): """ Compute the median along the specified axis, while ignoring NaNs. @@ -992,19 +1085,19 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) >>> a[0, 1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.median(a) nan >>> np.nanmedian(a) 3.0 >>> np.nanmedian(a, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.median(a, axis=1) - array([ 7., 2.]) + array([nan, 2.]) >>> b = a.copy() >>> np.nanmedian(b, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() >>> np.nanmedian(b, axis=None, overwrite_input=True) @@ -1026,6 +1119,12 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu return r +def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, + interpolation=None, keepdims=None): + return (a, q, out) + + +@array_function_dispatch(_nanpercentile_dispatcher) def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=np._NoValue): """ @@ -1114,27 +1213,27 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) >>> a[0][1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.percentile(a, 50) nan >>> np.nanpercentile(a, 50) - 3.5 + 3.0 >>> np.nanpercentile(a, 50, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.nanpercentile(a, 50, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.nanpercentile(a, 50, axis=0) >>> out = np.zeros_like(m) >>> np.nanpercentile(a, 50, axis=0, out=out) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> m - array([ 6.5, 2. , 2.5]) + array([6.5, 2. , 2.5]) >>> b = a.copy() >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) """ @@ -1146,12 +1245,19 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) +def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, + interpolation=None, keepdims=None): + return (a, q, out) + + +@array_function_dispatch(_nanquantile_dispatcher) def nanquantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=np._NoValue): """ Compute the qth quantile of the data along the specified axis, while ignoring nan values. Returns the qth quantile(s) of the array elements. + .. versionadded:: 1.15.0 Parameters @@ -1222,26 +1328,26 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) >>> a[0][1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.quantile(a, 0.5) nan >>> np.nanquantile(a, 0.5) - 3.5 + 3.0 >>> np.nanquantile(a, 0.5, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.nanquantile(a, 0.5, axis=0) >>> out = np.zeros_like(m) >>> np.nanquantile(a, 0.5, axis=0, out=out) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> m - array([ 6.5, 2. , 2.5]) + array([6.5, 2. , 2.5]) >>> b = a.copy() >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) """ a = np.asanyarray(a) @@ -1308,6 +1414,12 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) +def _nanvar_dispatcher( + a, axis=None, dtype=None, out=None, ddof=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanvar_dispatcher) def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): """ Compute the variance along the specified axis, while ignoring NaNs. @@ -1331,7 +1443,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): the variance of the flattened array. dtype : data-type, optional Type to use in computing the variance. For arrays of integer type - the default is `float32`; for arrays of float types it is the same as + the default is `float64`; for arrays of float types it is the same as the array type. out : ndarray, optional Alternate output array in which to place the result. It must have @@ -1390,12 +1502,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): Examples -------- >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) + >>> np.nanvar(a) 1.5555555555555554 >>> np.nanvar(a, axis=0) - array([ 1., 0.]) + array([1., 0.]) >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) + array([0., 0.25]) # may vary """ arr, mask = _replace_nan(a, 0) @@ -1442,13 +1554,20 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): isbad = (dof <= 0) if np.any(isbad): - warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2) + warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, + stacklevel=3) # NaN, inf, or negative numbers are all possible bad # values, so explicitly replace them with NaN. var = _copyto(var, np.nan, isbad) return var +def _nanstd_dispatcher( + a, axis=None, dtype=None, out=None, ddof=None, keepdims=None): + return (a, out) + + +@array_function_dispatch(_nanstd_dispatcher) def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): """ Compute the standard deviation along the specified axis, while @@ -1538,9 +1657,9 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): >>> np.nanstd(a) 1.247219128924647 >>> np.nanstd(a, axis=0) - array([ 1., 0.]) + array([1., 0.]) >>> np.nanstd(a, axis=1) - array([ 0., 0.5]) + array([0., 0.5]) # may vary """ var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, |