From aaac613ea54aed930d54aebe05c0179fc14dc031 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Thu, 2 May 2013 00:50:39 -0400 Subject: ENH: Adding np.nanmean(), np.nanstd(), np.nanvar() --- numpy/core/fromnumeric.py | 263 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 262 insertions(+), 1 deletion(-) (limited to 'numpy/core/fromnumeric.py') diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 0dacd4bca..8686e0531 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -22,7 +22,8 @@ __all__ = ['take', 'reshape', 'choose', 'repeat', 'put', 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', 'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', 'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', - 'rank', 'size', 'around', 'round_', 'mean', 'std', 'var', 'squeeze', + 'rank', 'size', 'around', 'round_', 'mean', 'nanmean', + 'std', 'nanstd', 'var', 'nanvar', 'squeeze', 'amax', 'amin', ] @@ -2665,6 +2666,8 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): See Also -------- average : Weighted average + nanmean : Arithmetic mean while ignoring NaNs + var, nanvar Notes ----- @@ -2711,6 +2714,80 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): return _methods._mean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) +def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + """ + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the means are computed. The default is to compute + the mean of the flattened array. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `doc.ufuncs` for details. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + See Also + -------- + average : Weighted average + mean : Arithmetic mean taken while not ignoring NaNs + var, nanvar + + Notes + ----- + The arithmetic mean is the sum of the non-nan elements along the axis + divided by the number of non-nan elements. + + Note that for floating-point input, the mean is computed using the + same precision the input has. Depending on the input data, this can + cause the results to be inaccurate, especially for `float32`. + Specifying a higher-precision accumulator using the `dtype` keyword + can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanmean(a) + 2.6666666666666665 + >>> np.nanmean(a, axis=0) + array([ 2., 4.]) + >>> np.nanmean(a, axis=1) + array([ 1., 3.5]) + + """ + if not (type(a) is mu.ndarray): + try: + mean = a.nanmean + return mean(axis=axis, dtype=dtype, out=out) + except AttributeError: + pass + + return _methods._nanmean(a, axis=axis, dtype=dtype, + out=out, keepdims=keepdims) + def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2753,6 +2830,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): See Also -------- var, mean + nanmean, nanstd numpy.doc.ufuncs : Section "Output arguments" Notes @@ -2813,6 +2891,97 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + """ + Compute the standard deviation along the specified axis, while + ignoring NaNs. + + Returns the standard deviation, a measure of the spread of a distribution, + of the non-NaN array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of the non-NaN values. + axis : int, optional + Axis along which the standard deviation is computed. The default is + to compute the standard deviation of the flattened array. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + See Also + -------- + var, mean, std + nanvar, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The standard deviation is the square root of the average of the squared + deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. + + The average squared deviation is normally calculated as + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, + the divisor ``N - ddof`` is used instead. In standard statistical + practice, ``ddof=1`` provides an unbiased estimator of the variance + of the infinite population. ``ddof=0`` provides a maximum likelihood + estimate of the variance for normally distributed variables. The + standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an + unbiased estimate of the standard deviation per se. + + Note that, for complex numbers, `std` takes the absolute + value before squaring, so that the result is always real and nonnegative. + + For floating-point input, the *std* is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for float32 (see example below). + Specifying a higher-accuracy accumulator using the `dtype` keyword can + alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanstd(a) + 1.247219128924647 + >>> np.nanstd(a, axis=0) + array([ 1., 0.]) + >>> np.nanstd(a, axis=1) + array([ 0., 0.5]) + + """ + + if not (type(a) is mu.ndarray): + try: + nanstd = a.nanstd + return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _methods._nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2915,3 +3084,95 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) + + +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + """ + Compute the variance along the specified axis, while ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the variance is computed. The default is to compute + the variance of the flattened array. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + See Also + -------- + std : Standard deviation + mean : Average + var : Variance while not ignoring NaNs + nanstd, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.var(a) + 1.5555555555555554 + >>> np.nanvar(a, axis=0) + array([ 1., 0.]) + >>> np.nanvar(a, axis=1) + array([ 0., 0.25]) + + """ + + if not (type(a) is mu.ndarray): + try: + nanvar = a.nanvar + return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _methods._nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) -- cgit v1.2.1 From fcb0fef5c673ed0a5442b18bcd8c391907b4f9a7 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Thu, 20 Jun 2013 20:44:54 -0600 Subject: MAINT: Separate nan functions into their own module. New files lib/nanfunctions.py and lib/tests/test_nanfunctions.py are added and both the previous and new nan functions and tests are moved into them. The existing nan functions moved from lib/function_base are: nansum, nanmin, nanmax, nanargmin, nanargmax The added nan functions moved from core/numeric are: nanmean, nanvar, nanstd --- numpy/core/fromnumeric.py | 279 ++-------------------------------------------- 1 file changed, 10 insertions(+), 269 deletions(-) (limited to 'numpy/core/fromnumeric.py') diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 8686e0531..35d36d960 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -14,18 +14,16 @@ from . import _methods _dt_ = nt.sctype2char -# functions that are now methods -__all__ = ['take', 'reshape', 'choose', 'repeat', 'put', - 'swapaxes', 'transpose', 'sort', 'argsort', 'partition', 'argpartition', - 'argmax', 'argmin', - 'searchsorted', 'alen', - 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', - 'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', - 'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', - 'rank', 'size', 'around', 'round_', 'mean', 'nanmean', - 'std', 'nanstd', 'var', 'nanvar', 'squeeze', - 'amax', 'amin', - ] +# functions that are methods +__all__ = [ + 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', + 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', + 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', + 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', + 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', + 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', + 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', + ] try: @@ -2714,81 +2712,6 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): return _methods._mean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) -def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): - """ - Compute the arithmetic mean along the specified axis, ignoring NaNs. - - Returns the average of the array elements. The average is taken over - the flattened array by default, otherwise over the specified axis. - `float64` intermediate and return values are used for integer inputs. - - Parameters - ---------- - a : array_like - Array containing numbers whose mean is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the means are computed. The default is to compute - the mean of the flattened array. - dtype : data-type, optional - Type to use in computing the mean. For integer inputs, the default - is `float64`; for floating point inputs, it is the same as the - input dtype. - out : ndarray, optional - Alternate output array in which to place the result. The default - is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - m : ndarray, see dtype parameter above - If `out=None`, returns a new array containing the mean values, - otherwise a reference to the output array is returned. - - See Also - -------- - average : Weighted average - mean : Arithmetic mean taken while not ignoring NaNs - var, nanvar - - Notes - ----- - The arithmetic mean is the sum of the non-nan elements along the axis - divided by the number of non-nan elements. - - Note that for floating-point input, the mean is computed using the - same precision the input has. Depending on the input data, this can - cause the results to be inaccurate, especially for `float32`. - Specifying a higher-precision accumulator using the `dtype` keyword - can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.nanmean(a) - 2.6666666666666665 - >>> np.nanmean(a, axis=0) - array([ 2., 4.]) - >>> np.nanmean(a, axis=1) - array([ 1., 3.5]) - - """ - if not (type(a) is mu.ndarray): - try: - mean = a.nanmean - return mean(axis=axis, dtype=dtype, out=out) - except AttributeError: - pass - - return _methods._nanmean(a, axis=axis, dtype=dtype, - out=out, keepdims=keepdims) - - def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the standard deviation along the specified axis. @@ -2891,97 +2814,6 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) -def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): - """ - Compute the standard deviation along the specified axis, while - ignoring NaNs. - - Returns the standard deviation, a measure of the spread of a distribution, - of the non-NaN array elements. The standard deviation is computed for the - flattened array by default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Calculate the standard deviation of the non-NaN values. - axis : int, optional - Axis along which the standard deviation is computed. The default is - to compute the standard deviation of the flattened array. - dtype : dtype, optional - Type to use in computing the standard deviation. For arrays of - integer type the default is float64, for arrays of float types it is - the same as the array type. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output but the type (of the calculated - values) will be cast if necessary. - ddof : int, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - By default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - standard_deviation : ndarray, see dtype parameter above. - If `out` is None, return a new array containing the standard deviation, - otherwise return a reference to the output array. - - See Also - -------- - var, mean, std - nanvar, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The standard deviation is the square root of the average of the squared - deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. - - The average squared deviation is normally calculated as - ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, - the divisor ``N - ddof`` is used instead. In standard statistical - practice, ``ddof=1`` provides an unbiased estimator of the variance - of the infinite population. ``ddof=0`` provides a maximum likelihood - estimate of the variance for normally distributed variables. The - standard deviation computed in this function is the square root of - the estimated variance, so even with ``ddof=1``, it will not be an - unbiased estimate of the standard deviation per se. - - Note that, for complex numbers, `std` takes the absolute - value before squaring, so that the result is always real and nonnegative. - - For floating-point input, the *std* is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for float32 (see example below). - Specifying a higher-accuracy accumulator using the `dtype` keyword can - alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.nanstd(a) - 1.247219128924647 - >>> np.nanstd(a, axis=0) - array([ 1., 0.]) - >>> np.nanstd(a, axis=1) - array([ 0., 0.5]) - - """ - - if not (type(a) is mu.ndarray): - try: - nanstd = a.nanstd - return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _methods._nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) - def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -3085,94 +2917,3 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) - -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, - keepdims=False): - """ - Compute the variance along the specified axis, while ignoring NaNs. - - Returns the variance of the array elements, a measure of the spread of a - distribution. The variance is computed for the flattened array by - default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Array containing numbers whose variance is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the variance is computed. The default is to compute - the variance of the flattened array. - dtype : data-type, optional - Type to use in computing the variance. For arrays of integer type - the default is `float32`; for arrays of float types it is the same as - the array type. - out : ndarray, optional - Alternate output array in which to place the result. It must have - the same shape as the expected output, but the type is cast if - necessary. - ddof : int, optional - "Delta Degrees of Freedom": the divisor used in the calculation is - ``N - ddof``, where ``N`` represents the number of elements. By - default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - variance : ndarray, see dtype parameter above - If ``out=None``, returns a new array containing the variance; - otherwise, a reference to the output array is returned. - - See Also - -------- - std : Standard deviation - mean : Average - var : Variance while not ignoring NaNs - nanstd, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The variance is the average of the squared deviations from the mean, - i.e., ``var = mean(abs(x - x.mean())**2)``. - - The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. - If, however, `ddof` is specified, the divisor ``N - ddof`` is used - instead. In standard statistical practice, ``ddof=1`` provides an - unbiased estimator of the variance of a hypothetical infinite population. - ``ddof=0`` provides a maximum likelihood estimate of the variance for - normally distributed variables. - - Note that for complex numbers, the absolute value is taken before - squaring, so that the result is always real and nonnegative. - - For floating-point input, the variance is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for `float32` (see example - below). Specifying a higher-accuracy accumulator using the ``dtype`` - keyword can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) - 1.5555555555555554 - >>> np.nanvar(a, axis=0) - array([ 1., 0.]) - >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) - - """ - - if not (type(a) is mu.ndarray): - try: - nanvar = a.nanvar - return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _methods._nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) -- cgit v1.2.1 From 1b6b8719735ca3e98a0a0c3f492f16ff00ba1aa9 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Fri, 21 Jun 2013 10:28:41 -0600 Subject: MAINT: Clean up core/_methods.py and core/fromnumeric.py Use issubclass instead of issubdtype. Add some blank lines. Remove trailing whitespace. Remove uneeded float casts since true_divide is default. Clean up documentation a bit. --- numpy/core/fromnumeric.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'numpy/core/fromnumeric.py') diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 35d36d960..e325c5fd1 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -62,9 +62,9 @@ def take(a, indices, axis=None, out=None, mode='raise'): The source array. indices : array_like The indices of the values to extract. - + .. versionadded:: 1.8.0 - + Also allow scalars for indices. axis : int, optional The axis over which to select values. By default, the flattened @@ -2664,8 +2664,7 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): See Also -------- average : Weighted average - nanmean : Arithmetic mean while ignoring NaNs - var, nanvar + std, var, nanmean, nanstd, nanvar Notes ----- @@ -2752,8 +2751,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): See Also -------- - var, mean - nanmean, nanstd + var, mean, nanmean, nanstd, nanvar numpy.doc.ufuncs : Section "Output arguments" Notes @@ -2856,8 +2854,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, See Also -------- - std : Standard deviation - mean : Average + std , mean, nanmean, nanstd, nanvar numpy.doc.ufuncs : Section "Output arguments" Notes -- cgit v1.2.1