summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py764
1 files changed, 617 insertions, 147 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 20e32a78d..86125168a 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -9,8 +9,7 @@ import numpy.core.numeric as _nx
from numpy.core import transpose
from numpy.core.numeric import (
ones, zeros_like, arange, concatenate, array, asarray, asanyarray, empty,
- ndarray, around, floor, ceil, take, dot, where, intp,
- integer, isscalar, absolute
+ ndarray, take, dot, where, intp, integer, isscalar, absolute
)
from numpy.core.umath import (
pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
@@ -51,6 +50,104 @@ __all__ = [
'quantile'
]
+# _QuantileInterpolation is a dictionary listing all the supported
+# interpolation methods to compute quantile/percentile.
+#
+# Below virtual_index refer to the index of the element where the percentile
+# would be found in the sorted sample.
+# When the sample contains exactly the percentile wanted, the virtual_index is
+# an integer to the index of this element.
+# When the percentile wanted is in between two elements, the virtual_index
+# is made of a integer part (a.k.a 'i' or 'left') and a fractional part
+# (a.k.a 'g' or 'gamma')
+#
+# Each _QuantileInterpolation have two properties
+# get_virtual_index : Callable
+# The function used to compute the virtual_index.
+# fix_gamma : Callable
+# A function used for discret methods to force the index to a specific value.
+_QuantileInterpolation = dict(
+ # --- HYNDMAN and FAN methods
+ # Discrete methods
+ inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
+ fix_gamma=lambda gamma, _: gamma, # should never be called
+ ),
+ averaged_inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
+ fix_gamma=lambda gamma, _: _get_gamma_mask(
+ shape=gamma.shape,
+ default_value=1.,
+ conditioned_value=0.5,
+ where=gamma == 0),
+ ),
+ closest_observation=dict(
+ get_virtual_index=lambda n, quantiles: _closest_observation(n,
+ quantiles),
+ fix_gamma=lambda gamma, _: gamma, # should never be called
+ ),
+ # Continuous methods
+ interpolated_inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 0, 1),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ hazen=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 0.5, 0.5),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ weibull=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 0, 0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ # Default value
+ linear=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 1, 1),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ median_unbiased=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 1 / 3.0, 1 / 3.0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ normal_unbiased=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ # --- OTHER METHODS fixme add deprecated ?
+ lower=dict(
+ get_virtual_index=lambda n, quantiles: np.floor(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ),
+ higher=dict(
+ get_virtual_index=lambda n, quantiles: np.ceil(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ),
+ midpoint=dict(
+ get_virtual_index=lambda n, quantiles: 0.5 * (
+ np.floor((n - 1) * quantiles)
+ + np.ceil((n - 1) * quantiles)),
+ fix_gamma=lambda gamma, index: _get_gamma_mask(
+ shape=gamma.shape,
+ default_value=0.5,
+ conditioned_value=0.,
+ where=index % 1 == 0),
+ ),
+ nearest=dict(
+ get_virtual_index=lambda n, quantiles: np.around(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ))
+
def _rot90_dispatcher(m, k=None, axes=None):
return (m,)
@@ -3760,8 +3857,13 @@ def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
@array_function_dispatch(_percentile_dispatcher)
-def percentile(a, q, axis=None, out=None,
- overwrite_input=False, interpolation='linear', keepdims=False):
+def percentile(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=False):
"""
Compute the q-th percentile of the data along the specified axis.
@@ -3789,21 +3891,32 @@ def percentile(a, q, axis=None, out=None,
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
-
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
- This optional parameter specifies the interpolation method to
+ interpolation : str, optional
+ This parameter specifies the interpolation method to
use when the desired percentile lies between two data points
- ``i < j``:
+ There are many different methods, some unique to NumPy. See the
+ notes for explanation. Options
- * 'linear': ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * 'lower': ``i``.
- * 'higher': ``j``.
- * 'nearest': ``i`` or ``j``, whichever is nearest.
- * 'midpoint': ``(i + j) / 2``.
+ * (NPY 1): 'lower'
+ * (NPY 2): 'higher',
+ * (NPY 3): 'midpoint'
+ * (NPY 4): 'nearest'
+ * (NPY 5): 'linear'
+
+ New options:
+
+ * (H&F 1): 'inverted_cdf'
+ * (H&F 2): 'averaged_inverted_cdf'
+ * (H&F 3): 'closest_observation'
+ * (H&F 4): 'interpolated_inverted_cdf'
+ * (H&F 5): 'hazen'
+ * (H&F 6): 'weibull'
+ * (H&F 7): 'linear' (default)
+ * (H&F 8): 'median_unbiased'
+ * (H&F 9): 'normal_unbiased'
+
+ .. versionchanged:: 1.22.0
- .. versionadded:: 1.9.0
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -3828,18 +3941,109 @@ def percentile(a, q, axis=None, out=None,
mean
median : equivalent to ``percentile(..., 50)``
nanpercentile
- quantile : equivalent to percentile, except with q in the range [0, 1].
+ quantile : equivalent to percentile, except q in the range [0, 1].
Notes
-----
- Given a vector ``V`` of length ``N``, the q-th percentile of
- ``V`` is the value ``q/100`` of the way from the minimum to the
- maximum in a sorted copy of ``V``. The values and distances of
- the two nearest neighbors as well as the `interpolation` parameter
- will determine the percentile if the normalized ranking does not
- match the location of ``q`` exactly. This function is the same as
- the median if ``q=50``, the same as the minimum if ``q=0`` and the
- same as the maximum if ``q=100``.
+ Given a vector ``V`` of length ``N``, the q-th percentile of ``V`` is
+ the value ``q/100`` of the way from the minimum to the maximum in a
+ sorted copy of ``V``. The values and distances of the two nearest
+ neighbors as well as the `interpolation` parameter will determine the
+ percentile if the normalized ranking does not match the location of
+ ``q`` exactly. This function is the same as the median if ``q=50``, the
+ same as the minimum if ``q=0`` and the same as the maximum if
+ ``q=100``.
+
+ This optional `interpolation` parameter specifies the interpolation
+ method to use when the desired quantile lies between two data points
+ ``i < j``. If ``g`` is the fractional part of the index surrounded by
+ ``i`` and alpha and beta are correction constants modifying i and j.
+
+ Below, 'q' is the quantile value, 'n' is the sample size and
+ alpha and beta are constants.
+ The following formula gives an interpolation "i + g" of where the quantile
+ would be in the sorted sample.
+ With 'i' being the floor and 'g' the fractional part of the result.
+
+ .. math::
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+
+ The different interpolation methods then work as follows
+
+ inverted_cdf:
+ method 1 of H&F [1]_.
+ This method gives discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then take i
+
+ averaged_inverted_cdf:
+ method 2 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then average between bounds
+
+ closest_observation:
+ method 3 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 and index is odd ; then take j
+ * if g = 0 and index is even ; then take i
+
+ interpolated_inverted_cdf:
+ method 4 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 1
+
+ hazen:
+ method 5 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1/2
+ * beta = 1/2
+
+ weibull:
+ method 6 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 0
+
+ linear:
+ method 7 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1
+ * beta = 1
+
+ median_unbiased:
+ method 8 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is unknown (see reference).
+ This method give continuous results using:
+ * alpha = 1/3
+ * beta = 1/3
+
+ normal_unbiased:
+ method 9 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is known to be normal.
+ This method give continuous results using:
+ * alpha = 3/8
+ * beta = 3/8
+
+ lower:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` as the interpolation point.
+
+ higher:
+ NumPy method kept for backwards compatibility.
+ Takes ``j`` as the interpolation point.
+
+ nearest:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` or ``j``, whichever is nearest.
+
+ midpoint:
+ NumPy method kept for backwards compatibility.
+ Uses ``(i + j) / 2``.
Examples
--------
@@ -3897,6 +4101,12 @@ def percentile(a, q, axis=None, out=None,
ax.legend()
plt.show()
+ References
+ ----------
+ .. [1] R. J. Hyndman and Y. Fan,
+ "Sample quantiles in statistical packages,"
+ The American Statistician, 50(4), pp. 361-365, 1996
+
"""
q = np.true_divide(q, 100)
q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105)
@@ -3912,8 +4122,13 @@ def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
@array_function_dispatch(_quantile_dispatcher)
-def quantile(a, q, axis=None, out=None,
- overwrite_input=False, interpolation='linear', keepdims=False):
+def quantile(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=False):
"""
Compute the q-th quantile of the data along the specified axis.
@@ -3927,29 +4142,43 @@ def quantile(a, q, axis=None, out=None,
Quantile or sequence of quantiles to compute, which must be between
0 and 1 inclusive.
axis : {int, tuple of int, None}, optional
- Axis or axes along which the quantiles are computed. The
- default is to compute the quantile(s) along a flattened
- version of the array.
+ Axis or axes along which the quantiles are computed. The default is
+ to compute the quantile(s) along a flattened version of the array.
out : ndarray, optional
- Alternative output array in which to place the result. It must
- have the same shape and buffer length as the expected output,
- but the type (of the output) will be cast if necessary.
+ Alternative output array in which to place the result. It must have
+ the same shape and buffer length as the expected output, but the
+ type (of the output) will be cast if necessary.
overwrite_input : bool, optional
- If True, then allow the input array `a` to be modified by intermediate
- calculations, to save memory. In this case, the contents of the input
- `a` after this function completes is undefined.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
- This optional parameter specifies the interpolation method to
- use when the desired quantile lies between two data points
- ``i < j``:
-
- * linear: ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * lower: ``i``.
- * higher: ``j``.
- * nearest: ``i`` or ``j``, whichever is nearest.
- * midpoint: ``(i + j) / 2``.
+ If True, then allow the input array `a` to be modified by
+ intermediate calculations, to save memory. In this case, the
+ contents of the input `a` after this function completes is
+ undefined.
+ interpolation : str, optional
+ This parameter specifies the interpolation method to use when the
+ desired quantile lies between two data points There are many
+ different methods, some unique to NumPy. See the notes for
+ explanation. Options:
+
+ * (NPY 1): 'lower'
+ * (NPY 2): 'higher',
+ * (NPY 3): 'midpoint'
+ * (NPY 4): 'nearest'
+ * (NPY 5): 'linear'
+
+ New options:
+
+ * (H&F 1): 'inverted_cdf'
+ * (H&F 2): 'averaged_inverted_cdf'
+ * (H&F 3): 'closest_observation'
+ * (H&F 4): 'interpolated_inverted_cdf'
+ * (H&F 5): 'hazen'
+ * (H&F 6): 'weibull'
+ * (H&F 7): 'linear' (default)
+ * (H&F 8): 'median_unbiased'
+ * (H&F 9): 'normal_unbiased'
+
+ .. versionadded:: 1.22.0
+
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -3976,14 +4205,98 @@ def quantile(a, q, axis=None, out=None,
Notes
-----
- Given a vector ``V`` of length ``N``, the q-th quantile of
- ``V`` is the value ``q`` of the way from the minimum to the
- maximum in a sorted copy of ``V``. The values and distances of
- the two nearest neighbors as well as the `interpolation` parameter
- will determine the quantile if the normalized ranking does not
- match the location of ``q`` exactly. This function is the same as
- the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the
- same as the maximum if ``q=1.0``.
+ Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the
+ value ``q`` of the way from the minimum to the maximum in a sorted copy of
+ ``V``. The values and distances of the two nearest neighbors as well as the
+ `interpolation` parameter will determine the quantile if the normalized
+ ranking does not match the location of ``q`` exactly. This function is the
+ same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and
+ the same as the maximum if ``q=1.0``.
+
+ This optional `interpolation` parameter specifies the interpolation method
+ to use when the desired quantile lies between two data points ``i < j``. If
+ ``g`` is the fractional part of the index surrounded by ``i`` and alpha
+ and beta are correction constants modifying i and j.
+
+ .. math::
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+
+ The different interpolation methods then work as follows
+
+ inverted_cdf:
+ method 1 of H&F [1]_.
+ This method gives discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then take i
+
+ averaged_inverted_cdf:
+ method 2 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then average between bounds
+
+ closest_observation:
+ method 3 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 and index is odd ; then take j
+ * if g = 0 and index is even ; then take i
+
+ interpolated_inverted_cdf:
+ method 4 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 1
+
+ hazen:
+ method 5 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1/2
+ * beta = 1/2
+
+ weibull:
+ method 6 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 0
+
+ linear:
+ method 7 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1
+ * beta = 1
+
+ median_unbiased:
+ method 8 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is unknown (see reference).
+ This method give continuous results using:
+ * alpha = 1/3
+ * beta = 1/3
+
+ normal_unbiased:
+ method 9 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is known to be normal.
+ This method give continuous results using:
+ * alpha = 3/8
+ * beta = 3/8
+
+ lower:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` as the interpolation point.
+
+ higher:
+ NumPy method kept for backwards compatibility.
+ Takes ``j`` as the interpolation point.
+
+ nearest:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` or ``j``, whichever is nearest.
+
+ midpoint:
+ NumPy method kept for backwards compatibility.
+ Uses ``(i + j) / 2``.
Examples
--------
@@ -4010,6 +4323,13 @@ def quantile(a, q, axis=None, out=None,
>>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
array([7., 2.])
>>> assert not np.all(a == b)
+
+ References
+ ----------
+ .. [1] R. J. Hyndman and Y. Fan,
+ "Sample quantiles in statistical packages,"
+ The American Statistician, 50(4), pp. 361-365, 1996
+
"""
q = np.asanyarray(q)
if not _quantile_is_valid(q):
@@ -4018,10 +4338,19 @@ def quantile(a, q, axis=None, out=None,
a, q, axis, out, overwrite_input, interpolation, keepdims)
-def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=False):
+def _quantile_unchecked(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=False):
"""Assumes that q is in [0, 1], and is an ndarray"""
- r, k = _ureduce(a, func=_quantile_ureduce_func, q=q, axis=axis, out=out,
+ r, k = _ureduce(a,
+ func=_quantile_ureduce_func,
+ q=q,
+ axis=axis,
+ out=out,
overwrite_input=overwrite_input,
interpolation=interpolation)
if keepdims:
@@ -4042,122 +4371,263 @@ def _quantile_is_valid(q):
return True
+def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
+ """
+ Compute the floating point indexes of an array for the linear
+ interpolation of quantiles.
+ n : array_like
+ The sample sizes.
+ quantiles : array_like
+ The quantiles values.
+ alpha : float
+ A constant used to correct the index computed.
+ beta : float
+ A constant used to correct the index computed.
+
+ alpha and beta values depend on the chosen method
+ (see quantile documentation)
+
+ Reference:
+ Hyndman&Fan paper "Sample Quantiles in Statistical Packages",
+ DOI: 10.1080/00031305.1996.10473566
+ """
+ return n * quantiles + (
+ alpha + quantiles * (1 - alpha - beta)
+ ) - 1
+
+
+def _get_gamma(virtual_indexes,
+ previous_indexes,
+ interpolation: _QuantileInterpolation):
+ """
+ Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
+ of quantiles.
+
+ virtual_indexes : array_like
+ The indexes where the percentile is supposed to be found in the sorted
+ sample.
+ previous_indexes : array_like
+ The floor values of virtual_indexes.
+ interpolation : _QuantileInterpolation
+ The interpolation method chosen, which may have a specific rule
+ modifying gamma.
+
+ gamma is usually the fractional part of virtual_indexes but can be modified
+ by the interpolation method.
+ """
+ gamma = np.asanyarray(virtual_indexes - previous_indexes)
+ gamma = interpolation["fix_gamma"](gamma, virtual_indexes)
+ return np.asanyarray(gamma)
+
+
def _lerp(a, b, t, out=None):
- """ Linearly interpolate from a to b by a factor of t """
+ """
+ Compute the linear interpolation weighted by gamma on each point of
+ two same shape array.
+
+ a : array_like
+ Left bound.
+ b : array_like
+ Right bound.
+ t : array_like
+ The interpolation weight.
+ out : array_like
+ Output array.
+ """
diff_b_a = subtract(b, a)
# asanyarray is a stop-gap until gh-13105
- lerp_interpolation = asanyarray(add(a, diff_b_a*t, out=out))
- subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t>=0.5)
+ lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out))
+ subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5)
if lerp_interpolation.ndim == 0 and out is None:
lerp_interpolation = lerp_interpolation[()] # unpack 0d arrays
return lerp_interpolation
-def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=False):
- a = asarray(a)
+def _get_gamma_mask(shape, default_value, conditioned_value, where):
+ out = np.full(shape, default_value)
+ out[where] = conditioned_value
+ return out
- # ufuncs cause 0d array results to decay to scalars (see gh-13105), which
- # makes them problematic for __setitem__ and attribute access. As a
- # workaround, we call this on the result of every ufunc on a possibly-0d
- # array.
- not_scalar = np.asanyarray
- # prepare a for partitioning
- if overwrite_input:
- if axis is None:
- ap = a.ravel()
- else:
- ap = a
- else:
- if axis is None:
- ap = a.flatten()
- else:
- ap = a.copy()
+def _discret_interpolation_to_boundaries(index, gamma_condition_fun):
+ previous = np.floor(index)
+ next = previous + 1
+ gamma = index - previous
+ return _get_gamma_mask(shape=index.shape,
+ default_value=next,
+ conditioned_value=previous,
+ where=gamma_condition_fun(gamma, index)
+ ).astype(np.intp)
- if axis is None:
- axis = 0
+def _closest_observation(n, quantiles):
+ gamma_fun = lambda gamma, index: (gamma == 0) & (np.floor(index) % 2 == 0)
+ return _discret_interpolation_to_boundaries((n * quantiles) - 1 - 0.5,
+ gamma_fun)
+
+
+def _inverted_cdf(n, quantiles):
+ gamma_fun = lambda gamma, _: (gamma == 0)
+ return _discret_interpolation_to_boundaries((n * quantiles) - 1,
+ gamma_fun)
+
+
+def _quantile_ureduce_func(
+ a: np.array,
+ q: np.array,
+ axis: int = None,
+ out=None,
+ overwrite_input: bool = False,
+ interpolation="linear",
+) -> np.array:
if q.ndim > 2:
# The code below works fine for nd, but it might not have useful
# semantics. For now, keep the supported dimensions the same as it was
# before.
raise ValueError("q must be a scalar or 1d")
-
- Nx = ap.shape[axis]
- indices = not_scalar(q * (Nx - 1))
- # round fractional indices according to interpolation method
- if interpolation == 'lower':
- indices = floor(indices).astype(intp)
- elif interpolation == 'higher':
- indices = ceil(indices).astype(intp)
- elif interpolation == 'midpoint':
- indices = 0.5 * (floor(indices) + ceil(indices))
- elif interpolation == 'nearest':
- indices = around(indices).astype(intp)
- elif interpolation == 'linear':
- pass # keep index as fraction and interpolate
+ if overwrite_input:
+ if axis is None:
+ axis = 0
+ arr = a.ravel()
+ else:
+ arr = a
else:
- raise ValueError(
- "interpolation can only be 'linear', 'lower' 'higher', "
- "'midpoint', or 'nearest'")
+ if axis is None:
+ axis = 0
+ arr = a.flatten()
+ else:
+ arr = a.copy()
+ result = _quantile(arr,
+ quantiles=q,
+ axis=axis,
+ interpolation=interpolation,
+ out=out)
+ return result
- # The dimensions of `q` are prepended to the output shape, so we need the
- # axis being sampled from `ap` to be first.
- ap = np.moveaxis(ap, axis, 0)
- del axis
- if np.issubdtype(indices.dtype, np.integer):
- # take the points along axis
+def _get_indexes(arr, virtual_indexes, valid_values_count):
+ """
+ Get the valid indexes of arr neighbouring virtual_indexes.
+ Note
+ This is a companion function to linear interpolation of
+ Quantiles
- if np.issubdtype(a.dtype, np.inexact):
+ Returns
+ -------
+ (previous_indexes, next_indexes): Tuple
+ A Tuple of virtual_indexes neighbouring indexes
+ """
+ previous_indexes = np.asanyarray(np.floor(virtual_indexes))
+ next_indexes = np.asanyarray(previous_indexes + 1)
+ indexes_above_bounds = virtual_indexes >= valid_values_count - 1
+ # When indexes is above max index, take the max value of the array
+ if indexes_above_bounds.any():
+ previous_indexes[indexes_above_bounds] = -1
+ next_indexes[indexes_above_bounds] = -1
+ # When indexes is below min index, take the min value of the array
+ indexes_below_bounds = virtual_indexes < 0
+ if indexes_below_bounds.any():
+ previous_indexes[indexes_below_bounds] = 0
+ next_indexes[indexes_below_bounds] = 0
+ if np.issubdtype(arr.dtype, np.inexact):
+ # After the sort, slices having NaNs will have for last element a NaN
+ virtual_indexes_nans = np.isnan(virtual_indexes)
+ if virtual_indexes_nans.any():
+ previous_indexes[virtual_indexes_nans] = -1
+ next_indexes[virtual_indexes_nans] = -1
+ previous_indexes = previous_indexes.astype(np.intp)
+ next_indexes = next_indexes.astype(np.intp)
+ return previous_indexes, next_indexes
+
+
+def _quantile(
+ arr: np.array,
+ quantiles: np.array,
+ axis: int = -1,
+ interpolation="linear",
+ out=None,
+):
+ """
+ Private function that doesn't support extended axis or keepdims.
+ These methods are extended to this function using _ureduce
+ See nanpercentile for parameter usage
+ It computes the quantiles of the array for the given axis.
+ A linear interpolation is performed based on the `interpolation`.
+
+ By default, the interpolation is "linear" where
+ alpha == beta == 1 which performs the 7th method of Hyndman&Fan.
+ With "median_unbiased" we get alpha == beta == 1/3
+ thus the 8th method of Hyndman&Fan.
+ """
+ # --- Setup
+ arr = np.asanyarray(arr)
+ values_count = arr.shape[axis]
+ # The dimensions of `q` are prepended to the output shape, so we need the
+ # axis being sampled from `arr` to be last.
+ DATA_AXIS = 0
+ if axis != DATA_AXIS: # But moveaxis is slow, so only call it if axis!=0.
+ arr = np.moveaxis(arr, axis, destination=DATA_AXIS)
+ # --- Computation of indexes
+ # Index where to find the value in the sorted array.
+ # Virtual because it is a floating point value, not an valid index.
+ # The nearest neighbours are used for interpolation
+ try:
+ interpolation = _QuantileInterpolation[interpolation]
+ except KeyError:
+ raise ValueError(
+ f"{interpolation!r} is not a valid interpolation. Use one of: "
+ f"{_QuantileInterpolation.keys()}") from None
+ virtual_indexes = interpolation["get_virtual_index"](values_count,
+ quantiles)
+ virtual_indexes = np.asanyarray(virtual_indexes)
+ if np.issubdtype(virtual_indexes.dtype, np.integer):
+ # No interpolation needed, take the points along axis
+ if np.issubdtype(arr.dtype, np.inexact):
# may contain nan, which would sort to the end
- ap.partition(concatenate((indices.ravel(), [-1])), axis=0)
- n = np.isnan(ap[-1])
+ arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0)
+ slices_having_nans = np.isnan(arr[-1])
else:
# cannot contain nan
- ap.partition(indices.ravel(), axis=0)
- n = np.array(False, dtype=bool)
-
- r = take(ap, indices, axis=0, out=out)
-
+ arr.partition(virtual_indexes.ravel(), axis=0)
+ slices_having_nans = np.array(False, dtype=bool)
+ result = take(arr, virtual_indexes, axis=0, out=out)
else:
- # weight the points above and below the indices
-
- indices_below = not_scalar(floor(indices)).astype(intp)
- indices_above = not_scalar(indices_below + 1)
- indices_above[indices_above > Nx - 1] = Nx - 1
-
- if np.issubdtype(a.dtype, np.inexact):
- # may contain nan, which would sort to the end
- ap.partition(concatenate((
- indices_below.ravel(), indices_above.ravel(), [-1]
- )), axis=0)
- n = np.isnan(ap[-1])
+ previous_indexes, next_indexes = _get_indexes(arr,
+ virtual_indexes,
+ values_count)
+ # --- Sorting
+ arr.partition(
+ np.unique(np.concatenate(([0, -1],
+ previous_indexes.ravel(),
+ next_indexes.ravel(),
+ ))),
+ axis=DATA_AXIS)
+ if np.issubdtype(arr.dtype, np.inexact):
+ slices_having_nans = np.isnan(
+ take(arr, indices=-1, axis=DATA_AXIS)
+ )
else:
- # cannot contain nan
- ap.partition(concatenate((
- indices_below.ravel(), indices_above.ravel()
- )), axis=0)
- n = np.array(False, dtype=bool)
-
- weights_shape = indices.shape + (1,) * (ap.ndim - 1)
- weights_above = not_scalar(indices - indices_below).reshape(weights_shape)
-
- x_below = take(ap, indices_below, axis=0)
- x_above = take(ap, indices_above, axis=0)
-
- r = _lerp(x_below, x_above, weights_above, out=out)
-
- # if any slice contained a nan, then all results on that slice are also nan
- if np.any(n):
- if r.ndim == 0 and out is None:
+ slices_having_nans = None
+ # --- Get values from indexes
+ previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
+ next = np.take(arr, next_indexes, axis=DATA_AXIS)
+ # --- Linear interpolation
+ gamma = _get_gamma(virtual_indexes,
+ previous_indexes,
+ interpolation)
+ result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
+ gamma = gamma.reshape(result_shape)
+ result = _lerp(previous,
+ next,
+ gamma,
+ out=out)
+ if np.any(slices_having_nans):
+ if result.ndim == 0 and out is None:
# can't write to a scalar
- r = a.dtype.type(np.nan)
+ result = arr.dtype.type(np.nan)
else:
- r[..., n] = a.dtype.type(np.nan)
-
- return r
+ result[..., slices_having_nans] = np.nan
+ return result
def _trapz_dispatcher(y, x=None, dx=None, axis=None):