summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorStephan Hoyer <shoyer@gmail.com>2018-04-23 17:03:15 -0700
committerGitHub <noreply@github.com>2018-04-23 17:03:15 -0700
commitac76793dafcd6e5f24ed052fc40413f29ebc5306 (patch)
treee5eed404c59488c4df688e4428fccb4d5afbad84 /numpy/lib/function_base.py
parenta043c3ed2a08fc42fd2eff6669a862c5cb045bfc (diff)
parent5d5c379ed5af0df19eec68fdde1f87ad994ce6b1 (diff)
downloadnumpy-ac76793dafcd6e5f24ed052fc40413f29ebc5306.tar.gz
Merge pull request #10199 from chunweiyuan/quantile
ENH: Quantile
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py110
1 files changed, 108 insertions, 2 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 099b63c40..72beef471 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -54,7 +54,8 @@ __all__ = [
'bincount', 'digitize', 'cov', 'corrcoef',
'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett',
'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring',
- 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc'
+ 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc',
+ 'quantile'
]
@@ -3427,7 +3428,7 @@ def percentile(a, q, axis=None, out=None,
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
This optional parameter specifies the interpolation method to
- use when the desired quantile lies between two data points
+ use when the desired percentile lies between two data points
``i < j``:
* 'linear': ``i + (j - i) * fraction``, where ``fraction``
@@ -3463,6 +3464,7 @@ def percentile(a, q, axis=None, out=None,
mean
median : equivalent to ``percentile(..., 50)``
nanpercentile
+ quantile : equivalent to percentile, except with q in the range [0, 1].
Notes
-----
@@ -3539,6 +3541,110 @@ def percentile(a, q, axis=None, out=None,
a, q, axis, out, overwrite_input, interpolation, keepdims)
+def quantile(a, q, axis=None, out=None,
+ overwrite_input=False, interpolation='linear', keepdims=False):
+ """
+ Compute the `q`th quantile of the data along the specified axis.
+ ..versionadded:: 1.15.0
+
+ Parameters
+ ----------
+ a : array_like
+ Input array or object that can be converted to an array.
+ q : array_like of float
+ Quantile or sequence of quantiles to compute, which must be between
+ 0 and 1 inclusive.
+ axis : {int, tuple of int, None}, optional
+ Axis or axes along which the quantiles are computed. The
+ default is to compute the quantile(s) along a flattened
+ version of the array.
+ out : ndarray, optional
+ Alternative output array in which to place the result. It must
+ have the same shape and buffer length as the expected output,
+ but the type (of the output) will be cast if necessary.
+ overwrite_input : bool, optional
+ If True, then allow the input array `a` to be modified by intermediate
+ calculations, to save memory. In this case, the contents of the input
+ `a` after this function completes is undefined.
+ interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+ This optional parameter specifies the interpolation method to
+ use when the desired quantile lies between two data points
+ ``i < j``:
+ * linear: ``i + (j - i) * fraction``, where ``fraction``
+ is the fractional part of the index surrounded by ``i``
+ and ``j``.
+ * lower: ``i``.
+ * higher: ``j``.
+ * nearest: ``i`` or ``j``, whichever is nearest.
+ * midpoint: ``(i + j) / 2``.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left in
+ the result as dimensions with size one. With this option, the
+ result will broadcast correctly against the original array `a`.
+
+ Returns
+ -------
+ quantile : scalar or ndarray
+ If `q` is a single quantile and `axis=None`, then the result
+ is a scalar. If multiple quantiles are given, first axis of
+ the result corresponds to the quantiles. The other axes are
+ the axes that remain after the reduction of `a`. If the input
+ contains integers or floats smaller than ``float64``, the output
+ data-type is ``float64``. Otherwise, the output data-type is the
+ same as that of the input. If `out` is specified, that array is
+ returned instead.
+
+ See Also
+ --------
+ mean
+ percentile : equivalent to quantile, but with q in the range [0, 100].
+ median : equivalent to ``quantile(..., 0.5)``
+ nanquantile
+
+ Notes
+ -----
+ Given a vector ``V`` of length ``N``, the ``q``-th quantile of
+ ``V`` is the value ``q`` of the way from the minimum to the
+ maximum in a sorted copy of ``V``. The values and distances of
+ the two nearest neighbors as well as the `interpolation` parameter
+ will determine the quantile if the normalized ranking does not
+ match the location of ``q`` exactly. This function is the same as
+ the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the
+ same as the maximum if ``q=1.0``.
+
+ Examples
+ --------
+ >>> a = np.array([[10, 7, 4], [3, 2, 1]])
+ >>> a
+ array([[10, 7, 4],
+ [ 3, 2, 1]])
+ >>> np.quantile(a, 0.5)
+ 3.5
+ >>> np.quantile(a, 0.5, axis=0)
+ array([[ 6.5, 4.5, 2.5]])
+ >>> np.quantile(a, 0.5, axis=1)
+ array([ 7., 2.])
+ >>> np.quantile(a, 0.5, axis=1, keepdims=True)
+ array([[ 7.],
+ [ 2.]])
+ >>> m = np.quantile(a, 0.5, axis=0)
+ >>> out = np.zeros_like(m)
+ >>> np.quantile(a, 0.5, axis=0, out=out)
+ array([[ 6.5, 4.5, 2.5]])
+ >>> m
+ array([[ 6.5, 4.5, 2.5]])
+ >>> b = a.copy()
+ >>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
+ array([ 7., 2.])
+ >>> assert not np.all(a == b)
+ """
+ q = np.asanyarray(q)
+ if not _quantile_is_valid(q):
+ raise ValueError("Quantiles must be in the range [0, 1]")
+ return _quantile_unchecked(
+ a, q, axis, out, overwrite_input, interpolation, keepdims)
+
+
def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
interpolation='linear', keepdims=False):
"""Assumes that q is in [0, 1], and is an ndarray"""