diff options
Diffstat (limited to 'numpy/lib/nanfunctions.py')
-rw-r--r-- | numpy/lib/nanfunctions.py | 316 |
1 files changed, 172 insertions, 144 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 44cc2b163..5766084ab 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -13,23 +13,18 @@ Functions - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values -Classes -------- -- `NanWarning` -- Warning raised by nanfunctions - """ from __future__ import division, absolute_import, print_function import warnings import numpy as np + __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanvar', 'nanstd', 'NanWarning' + 'nanvar', 'nanstd' ] -class NanWarning(RuntimeWarning): pass - def _replace_nan(a, val): """ @@ -103,7 +98,8 @@ def _divide_by_count(a, b, out=None): Compute a/b ignoring invalid results. If `a` is an array the division is done in place. If `a` is a scalar, then its type is preserved in the output. If out is None, then then a is used instead so that the - division is in place. + division is in place. Note that this is only called with `a` an inexact + type. Parameters ---------- @@ -140,37 +136,38 @@ def _divide_by_count(a, b, out=None): def nanmin(a, axis=None, out=None, keepdims=False): """ - Return the minimum of an array or minimum along an axis, ignoring any - NaNs. + Return minimum of an array or minimum along an axis, ignoring any NaNs. + When all-NaN slices are encountered a ``RuntimeWarning`` is raised and + Nan is returned for that slice. Parameters ---------- a : array_like - Array containing numbers whose minimum is desired. If `a` is not - an array, a conversion is attempted. + Array containing numbers whose minimum is desired. If `a` is not an + array, a conversion is attempted. axis : int, optional Axis along which the minimum is computed. The default is to compute the minimum of the flattened array. out : ndarray, optional Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. + expected output, but the type will be cast if necessary. See + `doc.ufuncs` for details. .. versionadded:: 1.8.0 keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `a`. + If this is set to True, the axes which are reduced are left in the + result as dimensions with size one. With this option, the result + will broadcast correctly against the original `a`. .. versionadded:: 1.8.0 Returns ------- nanmin : ndarray - An array with the same shape as `a`, with the specified axis removed. - If `a` is a 0-d array, or if axis is None, an ndarray scalar is - returned. The same dtype as `a` is returned. + An array with the same shape as `a`, with the specified axis + removed. If `a` is a 0-d array, or if axis is None, an ndarray + scalar is returned. The same dtype as `a` is returned. See Also -------- @@ -193,8 +190,8 @@ def nanmin(a, axis=None, out=None, keepdims=False): ----- Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. - Positive infinity is treated as a very large number and negative infinity - is treated as a very small (i.e. negative) number. + Positive infinity is treated as a very large number and negative + infinity is treated as a very small (i.e. negative) number. If the input has a integer type the function is equivalent to np.min. @@ -216,32 +213,51 @@ def nanmin(a, axis=None, out=None, keepdims=False): -inf """ - return np.fmin.reduce(a, axis=axis, out=out, keepdims=keepdims) + if not isinstance(a, np.ndarray) or type(a) is np.ndarray: + # Fast, but not safe for subclasses of ndarray + res = np.fmin.reduce(a, axis=axis, out=out, keepdims=keepdims) + if np.isnan(res).any(): + warnings.warn("All-NaN axis encountered", RuntimeWarning) + else: + # Slow, but safe for subclasses of ndarray + a, mask = _replace_nan(a, +np.inf) + res = np.amin(a, axis=axis, out=out, keepdims=keepdims) + if mask is None: + return res + + # Check for all-NaN axis + mask = np.all(mask, axis=axis, keepdims=keepdims) + if np.any(mask): + res = _copyto(res, mask, np.nan) + warnings.warn("All-NaN axis encountered", RuntimeWarning) + return res def nanmax(a, axis=None, out=None, keepdims=False): """ - Return the maximum of an array or maximum along an axis, ignoring any NaNs. + Return the maximum of an array or maximum along an axis, ignoring any + NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is + raised and NaN is returned for that slice. Parameters ---------- a : array_like - Array containing numbers whose maximum is desired. If `a` is not - an array, a conversion is attempted. + Array containing numbers whose maximum is desired. If `a` is not an + array, a conversion is attempted. axis : int, optional Axis along which the maximum is computed. The default is to compute the maximum of the flattened array. out : ndarray, optional Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. + expected output, but the type will be cast if necessary. See + `doc.ufuncs` for details. .. versionadded:: 1.8.0 keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `a`. + If this is set to True, the axes which are reduced are left in the + result as dimensions with size one. With this option, the result + will broadcast correctly against the original `a`. .. versionadded:: 1.8.0 @@ -273,8 +289,8 @@ def nanmax(a, axis=None, out=None, keepdims=False): ----- Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. - Positive infinity is treated as a very large number and negative infinity - is treated as a very small (i.e. negative) number. + Positive infinity is treated as a very large number and negative + infinity is treated as a very small (i.e. negative) number. If the input has a integer type the function is equivalent to np.max. @@ -296,15 +312,31 @@ def nanmax(a, axis=None, out=None, keepdims=False): inf """ - return np.fmax.reduce(a, axis=axis, out=out, keepdims=keepdims) + if not isinstance(a, np.ndarray) or type(a) is np.ndarray: + # Fast, but not safe for subclasses of ndarray + res = np.fmax.reduce(a, axis=axis, out=out, keepdims=keepdims) + if np.isnan(res).any(): + warnings.warn("All-NaN slice encountered", RuntimeWarning) + else: + # Slow, but safe for subclasses of ndarray + a, mask = _replace_nan(a, -np.inf) + res = np.amax(a, axis=axis, out=out, keepdims=keepdims) + if mask is None: + return res + + # Check for all-NaN axis + mask = np.all(mask, axis=axis, keepdims=keepdims) + if np.any(mask): + res = _copyto(res, mask, np.nan) + warnings.warn("All-NaN axis encountered", RuntimeWarning) + return res def nanargmin(a, axis=None): """ Return the indices of the minimum values in the specified axis ignoring - NaNs. For all-NaN slices, the negative number ``np.iinfo('intp').min`` - is returned. It is platform dependent. Warning: the results cannot be - trusted if a slice contains only NaNs and Infs. + NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results + cannot be trusted if a slice contains only NaNs and Infs. Parameters ---------- @@ -336,23 +368,19 @@ def nanargmin(a, axis=None): """ a, mask = _replace_nan(a, np.inf) - if mask is None: - return np.argmin(a, axis) - # May later want to do something special for all nan slices. - mask = mask.all(axis=axis) - ind = np.argmin(a, axis) - if mask.any(): - warnings.warn("All NaN axis detected.", NanWarning) - ind =_copyto(ind, np.iinfo(np.intp).min, mask) - return ind + res = np.argmin(a, axis=axis) + if mask is not None: + mask = np.all(mask, axis=axis) + if np.any(mask): + raise ValueError("All-NaN slice encountered") + return res def nanargmax(a, axis=None): """ Return the indices of the maximum values in the specified axis ignoring - NaNs. For all-NaN slices, the negative number ``np.iinfo('intp').min`` - is returned. It is platform dependent. Warning: the results cannot be - trusted if a slice contains only NaNs and -Infs. + NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the + results cannot be trusted if a slice contains only NaNs and -Infs. Parameters @@ -385,24 +413,21 @@ def nanargmax(a, axis=None): """ a, mask = _replace_nan(a, -np.inf) - if mask is None: - return np.argmax(a, axis) - # May later want to do something special for all nan slices. - mask = mask.all(axis=axis) - ind = np.argmax(a, axis) - if mask.any(): - warnings.warn("All NaN axis detected.", NanWarning) - ind = _copyto(ind, np.iinfo(np.intp).min, mask) - return ind + res = np.argmax(a, axis=axis) + if mask is not None: + mask = np.all(mask, axis=axis) + if np.any(mask): + raise ValueError("All-NaN slice encountered") + return res def nansum(a, axis=None, dtype=None, out=None, keepdims=0): """ - Return the sum of array elements over a given axis treating - Not a Numbers (NaNs) as zero. + Return the sum of array elements over a given axis treating Not a + Numbers (NaNs) as zero. - In Numpy versions <= 1.8 Nan is returned for slices that - are all-NaN or empty. In later versions zero is returned. + In Numpy versions <= 1.8 Nan is returned for slices that are all-NaN or + empty. In later versions zero is returned. Parameters ---------- @@ -410,19 +435,23 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=0): Array containing numbers whose sum is desired. If `a` is not an array, a conversion is attempted. axis : int, optional - Axis along which the sum is computed. The default is to compute - the sum of the flattened array. + Axis along which the sum is computed. The default is to compute the + sum of the flattened array. dtype : data-type, optional - Type to use in computing the sum. For integer inputs, the default - is the same as `int64`. For inexact inputs, it must be inexact. + The type of the returned array and of the accumulator in which the + elements are summed. By default, the dtype of `a` is used. An + exception is when `a` has an integer type with less precision than + the platform (u)intp. In that case, the default will be either + (u)int32 or (u)int64 depending on whether the platform is 32 or 64 + bits. For inexact inputs, dtype must be inexact. .. versionadded:: 1.8.0 out : ndarray, optional Alternate output array in which to place the result. The default is ``None``. If provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. The casting of NaN to integer can - yield unexpected results. + expected output, but the type will be cast if necessary. See + `doc.ufuncs` for details. The casting of NaN to integer can yield + unexpected results. .. versionadded:: 1.8.0 keepdims : bool, optional @@ -444,17 +473,13 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=0): Notes ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic - (IEEE 754). This means that Not a Number is not equivalent to infinity. - If positive or negative infinity are present the result is positive or - negative infinity. But if both positive and negative infinity are present, - the result is Not A Number (NaN). - - Arithmetic is modular when using integer types (all elements of `a` must - be finite i.e. no elements that are NaNs, positive infinity and negative - infinity because NaNs are floating point types), and no error is raised - on overflow. + If both positive and negative infinity are present, the sum will be Not + A Number (NaN). + Numpy integer arithmetic is modular. If the size of a sum exceeds the + size of an integer accumulator, its value will wrap around and the + result will be incorrect. Specifying ``dtype=double`` can alleviate + that problem. Examples -------- @@ -478,7 +503,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=0): """ a, mask = _replace_nan(a, 0) - return a.sum(axis, dtype, out, keepdims) + return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): @@ -489,7 +514,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): the flattened array by default, otherwise over the specified axis. `float64` intermediate and return values are used for integer inputs. - For all-NaN slices, NaN is returned and a `NanWarning` is raised. + For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. .. versionadded:: 1.8.0 @@ -503,17 +528,17 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): the mean of the flattened array. dtype : data-type, optional Type to use in computing the mean. For integer inputs, the default - is `float64`; for inexact inputs, it is the same as the - input dtype. + is `float64`; for inexact inputs, it is the same as the input + dtype. out : ndarray, optional Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. + expected output, but the type will be cast if necessary. See + `doc.ufuncs` for details. keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. + If this is set to True, the axes which are reduced are left in the + result as dimensions with size one. With this option, the result + will broadcast correctly against the original `arr`. Returns ------- @@ -533,11 +558,11 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): The arithmetic mean is the sum of the non-NaN elements along the axis divided by the number of non-NaN elements. - Note that for floating-point input, the mean is computed using the - same precision the input has. Depending on the input data, this can - cause the results to be inaccurate, especially for `float32`. - Specifying a higher-precision accumulator using the `dtype` keyword - can alleviate this issue. + Note that for floating-point input, the mean is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32`. Specifying a + higher-precision accumulator using the `dtype` keyword can alleviate + this issue. Examples -------- @@ -552,7 +577,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): """ arr, mask = _replace_nan(a, 0) if mask is None: - return np.mean(arr, axis, dtype=dtype, out=out, keepdims=keepdims) + return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) if dtype is not None: dtype = np.dtype(dtype) @@ -564,28 +589,28 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): # The warning context speeds things up. with warnings.catch_warnings(): warnings.simplefilter('ignore') - cnt = np.add.reduce(~mask, axis, dtype=np.intp, keepdims=keepdims) - tot = np.add.reduce(arr, axis, dtype=dtype, out=out, keepdims=keepdims) + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) + tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) avg = _divide_by_count(tot, cnt, out=out) isbad = (cnt == 0) if isbad.any(): - warnings.warn("Mean of empty slice", NanWarning) + warnings.warn("Mean of empty slice", RuntimeWarning) # NaN is the only possible bad value, so no further # action is needed to handle bad results. return avg -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, - keepdims=False): +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the specified axis, while ignoring NaNs. - Returns the variance of the array elements, a measure of the spread of a - distribution. The variance is computed for the flattened array by + Returns the variance of the array elements, a measure of the spread of + a distribution. The variance is computed for the flattened array by default, otherwise over the specified axis. - For all-NaN slices, NaN is returned and a `NanWarning` is raised. + For all-NaN slices or slices with zero degrees of freedom, NaN is + returned and a `RuntimeWarning` is raised. .. versionadded:: 1.8.0 @@ -638,9 +663,9 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, the divisor ``N - ddof`` is used instead. In standard statistical practice, ``ddof=1`` provides an - unbiased estimator of the variance of a hypothetical infinite population. - ``ddof=0`` provides a maximum likelihood estimate of the variance for - normally distributed variables. + unbiased estimator of the variance of a hypothetical infinite + population. ``ddof=0`` provides a maximum likelihood estimate of the + variance for normally distributed variables. Note that for complex numbers, the absolute value is taken before squaring, so that the result is always real and nonnegative. @@ -664,7 +689,8 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, """ arr, mask = _replace_nan(a, 0) if mask is None: - return np.var(arr, axis, dtype=dtype, out=out, keepdims=keepdims) + return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) if dtype is not None: dtype = np.dtype(dtype) @@ -677,30 +703,29 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, warnings.simplefilter('ignore') # Compute mean - cnt = np.add.reduce(~mask, axis, dtype=np.intp, keepdims=True) - tot = np.add.reduce(arr, axis, dtype=dtype, keepdims=True) - avg = np.divide(tot, cnt, out=tot) + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=True) + avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=True) + avg = _divide_by_count(avg, cnt) # Compute squared deviation from mean. - x = arr - avg - np.copyto(x, 0, where=mask) + arr -= avg + arr = _copyto(arr, 0, mask) if issubclass(arr.dtype.type, np.complexfloating): - sqr = np.multiply(x, x.conj(), out=x).real + sqr = np.multiply(arr, arr.conj(), out=arr).real else: - sqr = np.multiply(x, x, out=x) - - # adjust cnt. - if not keepdims: - cnt = cnt.squeeze(axis) - cnt -= ddof + sqr = np.multiply(arr, arr, out=arr) # Compute variance. - var = np.add.reduce(sqr, axis, dtype=dtype, out=out, keepdims=keepdims) - var = _divide_by_count(var, cnt) + var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + if var.ndim < cnt.ndim: + # Subclasses of ndarray may ignore keepdims, so check here. + cnt = cnt.squeeze(axis) + dof = cnt - ddof + var = _divide_by_count(var, dof) - isbad = (cnt <= 0) - if isbad.any(): - warnings.warn("Degrees of freedom <= 0 for slice.", NanWarning) + isbad = (dof <= 0) + if np.any(isbad): + warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning) # NaN, inf, or negative numbers are all possible bad # values, so explicitly replace them with NaN. var = _copyto(var, np.nan, isbad) @@ -712,11 +737,13 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): Compute the standard deviation along the specified axis, while ignoring NaNs. - Returns the standard deviation, a measure of the spread of a distribution, - of the non-NaN array elements. The standard deviation is computed for the - flattened array by default, otherwise over the specified axis. + Returns the standard deviation, a measure of the spread of a + distribution, of the non-NaN array elements. The standard deviation is + computed for the flattened array by default, otherwise over the + specified axis. - For all-NaN slices, NaN is returned and a `NanWarning` is raised. + For all-NaN slices or slices with zero degrees of freedom, NaN is + returned and a `RuntimeWarning` is raised. .. versionadded:: 1.8.0 @@ -729,12 +756,12 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): to compute the standard deviation of the flattened array. dtype : dtype, optional Type to use in computing the standard deviation. For arrays of - integer type the default is float64, for arrays of float types it is - the same as the array type. + integer type the default is float64, for arrays of float types it + is the same as the array type. out : ndarray, optional Alternative output array in which to place the result. It must have - the same shape as the expected output but the type (of the calculated - values) will be cast if necessary. + the same shape as the expected output but the type (of the + calculated values) will be cast if necessary. ddof : int, optional Means Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of non-NaN @@ -761,26 +788,26 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): Notes ----- The standard deviation is the square root of the average of the squared - deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. + deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``. The average squared deviation is normally calculated as - ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, - the divisor ``N - ddof`` is used instead. In standard statistical - practice, ``ddof=1`` provides an unbiased estimator of the variance - of the infinite population. ``ddof=0`` provides a maximum likelihood - estimate of the variance for normally distributed variables. The - standard deviation computed in this function is the square root of + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is + specified, the divisor ``N - ddof`` is used instead. In standard + statistical practice, ``ddof=1`` provides an unbiased estimator of the + variance of the infinite population. ``ddof=0`` provides a maximum + likelihood estimate of the variance for normally distributed variables. + The standard deviation computed in this function is the square root of the estimated variance, so even with ``ddof=1``, it will not be an unbiased estimate of the standard deviation per se. - Note that, for complex numbers, `std` takes the absolute - value before squaring, so that the result is always real and nonnegative. + Note that, for complex numbers, `std` takes the absolute value before + squaring, so that the result is always real and nonnegative. For floating-point input, the *std* is computed using the same precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for float32 (see example below). - Specifying a higher-accuracy accumulator using the `dtype` keyword can - alleviate this issue. + the results to be inaccurate, especially for float32 (see example + below). Specifying a higher-accuracy accumulator using the `dtype` + keyword can alleviate this issue. Examples -------- @@ -793,7 +820,8 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): array([ 0., 0.5]) """ - var = nanvar(a, axis, dtype, out, ddof, keepdims) + var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) if isinstance(var, np.ndarray): std = np.sqrt(var, out=var) else: |