diff options
author | Stephan Hoyer <shoyer@gmail.com> | 2018-04-23 17:03:15 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-23 17:03:15 -0700 |
commit | ac76793dafcd6e5f24ed052fc40413f29ebc5306 (patch) | |
tree | e5eed404c59488c4df688e4428fccb4d5afbad84 /numpy/lib/nanfunctions.py | |
parent | a043c3ed2a08fc42fd2eff6669a862c5cb045bfc (diff) | |
parent | 5d5c379ed5af0df19eec68fdde1f87ad994ce6b1 (diff) | |
download | numpy-ac76793dafcd6e5f24ed052fc40413f29ebc5306.tar.gz |
Merge pull request #10199 from chunweiyuan/quantile
ENH: Quantile
Diffstat (limited to 'numpy/lib/nanfunctions.py')
-rw-r--r-- | numpy/lib/nanfunctions.py | 110 |
1 files changed, 108 insertions, 2 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index dddc0e5b8..abd2da1a2 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -16,6 +16,7 @@ Functions - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values - `nanmedian` -- median of non-NaN values +- `nanquantile` -- qth quantile of non-NaN values - `nanpercentile` -- qth percentile of non-NaN values """ @@ -29,7 +30,7 @@ from numpy.lib import function_base __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', - 'nancumsum', 'nancumprod' + 'nancumsum', 'nancumprod', 'nanquantile' ] @@ -1057,7 +1058,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, `a` after this function completes is undefined. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -1095,6 +1096,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, nanmean nanmedian : equivalent to ``nanpercentile(..., 50)`` percentile, median, mean + nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. Notes ----- @@ -1144,6 +1146,110 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) +def nanquantile(a, q, axis=None, out=None, overwrite_input=False, + interpolation='linear', keepdims=np._NoValue): + """ + Compute the qth quantile of the data along the specified axis, + while ignoring nan values. + Returns the qth quantile(s) of the array elements. + .. versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, containing + nan values to be ignored + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + quantile + nanmean, nanmedian + nanmedian : equivalent to ``nanquantile(..., 0.5)`` + nanpercentile : same as nanquantile, but with q in the range [0, 100]. + + Examples + -------- + >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) + >>> a[0][1] = np.nan + >>> a + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.quantile(a, 0.5) + nan + >>> np.nanquantile(a, 0.5) + 3.5 + >>> np.nanquantile(a, 0.5, axis=0) + array([ 6.5, 2., 2.5]) + >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.nanquantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.nanquantile(a, 0.5, axis=0, out=out) + array([ 6.5, 2., 2.5]) + >>> m + array([ 6.5, 2. , 2.5]) + >>> b = a.copy() + >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + """ + a = np.asanyarray(a) + q = np.asanyarray(q) + if not function_base._quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _nanquantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=np._NoValue): """Assumes that q is in [0, 1], and is an ndarray""" |