summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2021-11-18 14:32:20 -0800
committerDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2021-11-18 14:32:20 -0800
commit9fe353e048bc317099bb13cae4cb5a4de8c0c656 (patch)
tree4976e4328da9906d99cd2478547ca1e2829e071d /numpy/lib/function_base.py
parent66ca468b97873c1c8bfe69bddac6df633780c71c (diff)
parent5e9ce0c0529e3085498ac892941a020a65c7369a (diff)
downloadnumpy-9fe353e048bc317099bb13cae4cb5a4de8c0c656.tar.gz
Merge branch 'as_min_max' of https://github.com/Developer-Ecosystem-Engineering/numpy into as_min_max
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py869
1 files changed, 696 insertions, 173 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 84128e4f0..a215f63d3 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -9,8 +9,7 @@ import numpy.core.numeric as _nx
from numpy.core import transpose
from numpy.core.numeric import (
ones, zeros_like, arange, concatenate, array, asarray, asanyarray, empty,
- ndarray, around, floor, ceil, take, dot, where, intp,
- integer, isscalar, absolute
+ ndarray, take, dot, where, intp, integer, isscalar, absolute
)
from numpy.core.umath import (
pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
@@ -51,6 +50,106 @@ __all__ = [
'quantile'
]
+# _QuantileMethods is a dictionary listing all the supported methods to
+# compute quantile/percentile.
+#
+# Below virtual_index refer to the index of the element where the percentile
+# would be found in the sorted sample.
+# When the sample contains exactly the percentile wanted, the virtual_index is
+# an integer to the index of this element.
+# When the percentile wanted is in between two elements, the virtual_index
+# is made of a integer part (a.k.a 'i' or 'left') and a fractional part
+# (a.k.a 'g' or 'gamma')
+#
+# Each method in _QuantileMethods has two properties
+# get_virtual_index : Callable
+# The function used to compute the virtual_index.
+# fix_gamma : Callable
+# A function used for discret methods to force the index to a specific value.
+_QuantileMethods = dict(
+ # --- HYNDMAN and FAN METHODS
+ # Discrete methods
+ inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
+ fix_gamma=lambda gamma, _: gamma, # should never be called
+ ),
+ averaged_inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
+ fix_gamma=lambda gamma, _: _get_gamma_mask(
+ shape=gamma.shape,
+ default_value=1.,
+ conditioned_value=0.5,
+ where=gamma == 0),
+ ),
+ closest_observation=dict(
+ get_virtual_index=lambda n, quantiles: _closest_observation(n,
+ quantiles),
+ fix_gamma=lambda gamma, _: gamma, # should never be called
+ ),
+ # Continuous methods
+ interpolated_inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 0, 1),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ hazen=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 0.5, 0.5),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ weibull=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 0, 0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ # Default method.
+ # To avoid some rounding issues, `(n-1) * quantiles` is preferred to
+ # `_compute_virtual_index(n, quantiles, 1, 1)`.
+ # They are mathematically equivalent.
+ linear=dict(
+ get_virtual_index=lambda n, quantiles: (n - 1) * quantiles,
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ median_unbiased=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 1 / 3.0, 1 / 3.0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ normal_unbiased=dict(
+ get_virtual_index=lambda n, quantiles:
+ _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ # --- OTHER METHODS
+ lower=dict(
+ get_virtual_index=lambda n, quantiles: np.floor(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ),
+ higher=dict(
+ get_virtual_index=lambda n, quantiles: np.ceil(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ),
+ midpoint=dict(
+ get_virtual_index=lambda n, quantiles: 0.5 * (
+ np.floor((n - 1) * quantiles)
+ + np.ceil((n - 1) * quantiles)),
+ fix_gamma=lambda gamma, index: _get_gamma_mask(
+ shape=gamma.shape,
+ default_value=0.5,
+ conditioned_value=0.,
+ where=index % 1 == 0),
+ ),
+ nearest=dict(
+ get_virtual_index=lambda n, quantiles: np.around(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ))
+
def _rot90_dispatcher(m, k=None, axes=None):
return (m,)
@@ -670,11 +769,16 @@ def select(condlist, choicelist, default=0):
Examples
--------
- >>> x = np.arange(10)
- >>> condlist = [x<3, x>5]
+ >>> x = np.arange(6)
+ >>> condlist = [x<3, x>3]
+ >>> choicelist = [x, x**2]
+ >>> np.select(condlist, choicelist, 42)
+ array([ 0, 1, 2, 42, 16, 25])
+
+ >>> condlist = [x<=4, x>3]
>>> choicelist = [x, x**2]
- >>> np.select(condlist, choicelist)
- array([ 0, 1, 2, ..., 49, 64, 81])
+ >>> np.select(condlist, choicelist, 55)
+ array([ 0, 1, 2, 3, 4, 25])
"""
# Check the size of condlist and choicelist are the same, or abort.
@@ -3750,13 +3854,20 @@ def _median(a, axis=None, out=None, overwrite_input=False):
def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
- interpolation=None, keepdims=None):
+ method=None, keepdims=None, *, interpolation=None):
return (a, q, out)
@array_function_dispatch(_percentile_dispatcher)
-def percentile(a, q, axis=None, out=None,
- overwrite_input=False, interpolation='linear', keepdims=False):
+def percentile(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ method="linear",
+ keepdims=False,
+ *,
+ interpolation=None):
"""
Compute the q-th percentile of the data along the specified axis.
@@ -3784,21 +3895,34 @@ def percentile(a, q, axis=None, out=None,
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
+ method : str, optional
+ This parameter specifies the method to use for estimating the
+ percentile. There are many different methods, some unique to NumPy.
+ See the notes for explanation. The options sorted by their R type
+ as summarized in the H&F paper [1]_ are:
+
+ 1. 'inverted_cdf'
+ 2. 'averaged_inverted_cdf'
+ 3. 'closest_observation'
+ 4. 'interpolated_inverted_cdf'
+ 5. 'hazen'
+ 6. 'weibull'
+ 7. 'linear' (default)
+ 8. 'median_unbiased'
+ 9. 'normal_unbiased'
+
+ The first three methods are discontiuous. NumPy further defines the
+ following discontinuous variations of the default 'linear' (7.) option:
+
+ * 'lower'
+ * 'higher',
+ * 'midpoint'
+ * 'nearest'
+
+ .. versionchanged:: 1.22.0
+ This argument was previously called "interpolation" and only
+ offered the "linear" default and last four options.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
- This optional parameter specifies the interpolation method to
- use when the desired percentile lies between two data points
- ``i < j``:
-
- * 'linear': ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * 'lower': ``i``.
- * 'higher': ``j``.
- * 'nearest': ``i`` or ``j``, whichever is nearest.
- * 'midpoint': ``(i + j) / 2``.
-
- .. versionadded:: 1.9.0
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -3806,6 +3930,11 @@ def percentile(a, q, axis=None, out=None,
.. versionadded:: 1.9.0
+ interpolation : str, optional
+ Deprecated name for the method keyword argument.
+
+ .. deprecated:: 1.22.0
+
Returns
-------
percentile : scalar or ndarray
@@ -3823,18 +3952,109 @@ def percentile(a, q, axis=None, out=None,
mean
median : equivalent to ``percentile(..., 50)``
nanpercentile
- quantile : equivalent to percentile, except with q in the range [0, 1].
+ quantile : equivalent to percentile, except q in the range [0, 1].
Notes
-----
- Given a vector ``V`` of length ``N``, the q-th percentile of
- ``V`` is the value ``q/100`` of the way from the minimum to the
- maximum in a sorted copy of ``V``. The values and distances of
- the two nearest neighbors as well as the `interpolation` parameter
- will determine the percentile if the normalized ranking does not
- match the location of ``q`` exactly. This function is the same as
- the median if ``q=50``, the same as the minimum if ``q=0`` and the
- same as the maximum if ``q=100``.
+ Given a vector ``V`` of length ``N``, the q-th percentile of ``V`` is
+ the value ``q/100`` of the way from the minimum to the maximum in a
+ sorted copy of ``V``. The values and distances of the two nearest
+ neighbors as well as the `method` parameter will determine the
+ percentile if the normalized ranking does not match the location of
+ ``q`` exactly. This function is the same as the median if ``q=50``, the
+ same as the minimum if ``q=0`` and the same as the maximum if
+ ``q=100``.
+
+ This optional `method` parameter specifies the method to use when the
+ desired quantile lies between two data points ``i < j``.
+ If ``g`` is the fractional part of the index surrounded by ``i`` and
+ alpha and beta are correction constants modifying i and j.
+
+ Below, 'q' is the quantile value, 'n' is the sample size and
+ alpha and beta are constants.
+ The following formula gives an interpolation "i + g" of where the quantile
+ would be in the sorted sample.
+ With 'i' being the floor and 'g' the fractional part of the result.
+
+ .. math::
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+
+ The different methods then work as follows
+
+ inverted_cdf:
+ method 1 of H&F [1]_.
+ This method gives discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then take i
+
+ averaged_inverted_cdf:
+ method 2 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then average between bounds
+
+ closest_observation:
+ method 3 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 and index is odd ; then take j
+ * if g = 0 and index is even ; then take i
+
+ interpolated_inverted_cdf:
+ method 4 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 1
+
+ hazen:
+ method 5 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1/2
+ * beta = 1/2
+
+ weibull:
+ method 6 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 0
+
+ linear:
+ method 7 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1
+ * beta = 1
+
+ median_unbiased:
+ method 8 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is unknown (see reference).
+ This method give continuous results using:
+ * alpha = 1/3
+ * beta = 1/3
+
+ normal_unbiased:
+ method 9 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is known to be normal.
+ This method give continuous results using:
+ * alpha = 3/8
+ * beta = 3/8
+
+ lower:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` as the interpolation point.
+
+ higher:
+ NumPy method kept for backwards compatibility.
+ Takes ``j`` as the interpolation point.
+
+ nearest:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` or ``j``, whichever is nearest.
+
+ midpoint:
+ NumPy method kept for backwards compatibility.
+ Uses ``(i + j) / 2``.
Examples
--------
@@ -3864,7 +4084,7 @@ def percentile(a, q, axis=None, out=None,
array([7., 2.])
>>> assert not np.all(a == b)
- The different types of interpolation can be visualized graphically:
+ The different methods can be visualized graphically:
.. plot::
@@ -3874,41 +4094,62 @@ def percentile(a, q, axis=None, out=None,
p = np.linspace(0, 100, 6001)
ax = plt.gca()
lines = [
- ('linear', None),
- ('higher', '--'),
- ('lower', '--'),
- ('nearest', '-.'),
- ('midpoint', '-.'),
- ]
- for interpolation, style in lines:
+ ('linear', '-', 'C0'),
+ ('inverted_cdf', ':', 'C1'),
+ # Almost the same as `inverted_cdf`:
+ ('averaged_inverted_cdf', '-.', 'C1'),
+ ('closest_observation', ':', 'C2'),
+ ('interpolated_inverted_cdf', '--', 'C1'),
+ ('hazen', '--', 'C3'),
+ ('weibull', '-.', 'C4'),
+ ('median_unbiased', '--', 'C5'),
+ ('normal_unbiased', '-.', 'C6'),
+ ]
+ for method, style, color in lines:
ax.plot(
- p, np.percentile(a, p, interpolation=interpolation),
- label=interpolation, linestyle=style)
+ p, np.percentile(a, p, method=method),
+ label=method, linestyle=style, color=color)
ax.set(
- title='Interpolation methods for list: ' + str(a),
+ title='Percentiles for different methods and data: ' + str(a),
xlabel='Percentile',
- ylabel='List item returned',
+ ylabel='Estimated percentile value',
yticks=a)
ax.legend()
plt.show()
+ References
+ ----------
+ .. [1] R. J. Hyndman and Y. Fan,
+ "Sample quantiles in statistical packages,"
+ The American Statistician, 50(4), pp. 361-365, 1996
+
"""
+ if interpolation is not None:
+ method = _check_interpolation_as_method(
+ method, interpolation, "percentile")
q = np.true_divide(q, 100)
q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105)
if not _quantile_is_valid(q):
raise ValueError("Percentiles must be in the range [0, 100]")
return _quantile_unchecked(
- a, q, axis, out, overwrite_input, interpolation, keepdims)
+ a, q, axis, out, overwrite_input, method, keepdims)
def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
- interpolation=None, keepdims=None):
+ method=None, keepdims=None, *, interpolation=None):
return (a, q, out)
@array_function_dispatch(_quantile_dispatcher)
-def quantile(a, q, axis=None, out=None,
- overwrite_input=False, interpolation='linear', keepdims=False):
+def quantile(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ method="linear",
+ keepdims=False,
+ *,
+ interpolation=None):
"""
Compute the q-th quantile of the data along the specified axis.
@@ -3922,34 +4163,55 @@ def quantile(a, q, axis=None, out=None,
Quantile or sequence of quantiles to compute, which must be between
0 and 1 inclusive.
axis : {int, tuple of int, None}, optional
- Axis or axes along which the quantiles are computed. The
- default is to compute the quantile(s) along a flattened
- version of the array.
+ Axis or axes along which the quantiles are computed. The default is
+ to compute the quantile(s) along a flattened version of the array.
out : ndarray, optional
- Alternative output array in which to place the result. It must
- have the same shape and buffer length as the expected output,
- but the type (of the output) will be cast if necessary.
+ Alternative output array in which to place the result. It must have
+ the same shape and buffer length as the expected output, but the
+ type (of the output) will be cast if necessary.
overwrite_input : bool, optional
- If True, then allow the input array `a` to be modified by intermediate
- calculations, to save memory. In this case, the contents of the input
- `a` after this function completes is undefined.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
- This optional parameter specifies the interpolation method to
- use when the desired quantile lies between two data points
- ``i < j``:
-
- * linear: ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * lower: ``i``.
- * higher: ``j``.
- * nearest: ``i`` or ``j``, whichever is nearest.
- * midpoint: ``(i + j) / 2``.
+ If True, then allow the input array `a` to be modified by
+ intermediate calculations, to save memory. In this case, the
+ contents of the input `a` after this function completes is
+ undefined.
+ method : str, optional
+ This parameter specifies the method to use for estimating the
+ quantile. There are many different methods, some unique to NumPy.
+ See the notes for explanation. The options sorted by their R type
+ as summarized in the H&F paper [1]_ are:
+
+ 1. 'inverted_cdf'
+ 2. 'averaged_inverted_cdf'
+ 3. 'closest_observation'
+ 4. 'interpolated_inverted_cdf'
+ 5. 'hazen'
+ 6. 'weibull'
+ 7. 'linear' (default)
+ 8. 'median_unbiased'
+ 9. 'normal_unbiased'
+
+ The first three methods are discontiuous. NumPy further defines the
+ following discontinuous variations of the default 'linear' (7.) option:
+
+ * 'lower'
+ * 'higher',
+ * 'midpoint'
+ * 'nearest'
+
+ .. versionchanged:: 1.22.0
+ This argument was previously called "interpolation" and only
+ offered the "linear" default and last four options.
+
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
result will broadcast correctly against the original array `a`.
+ interpolation : str, optional
+ Deprecated name for the method keyword argument.
+
+ .. deprecated:: 1.22.0
+
Returns
-------
quantile : scalar or ndarray
@@ -3971,14 +4233,98 @@ def quantile(a, q, axis=None, out=None,
Notes
-----
- Given a vector ``V`` of length ``N``, the q-th quantile of
- ``V`` is the value ``q`` of the way from the minimum to the
- maximum in a sorted copy of ``V``. The values and distances of
- the two nearest neighbors as well as the `interpolation` parameter
- will determine the quantile if the normalized ranking does not
- match the location of ``q`` exactly. This function is the same as
- the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the
- same as the maximum if ``q=1.0``.
+ Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the
+ value ``q`` of the way from the minimum to the maximum in a sorted copy of
+ ``V``. The values and distances of the two nearest neighbors as well as the
+ `method` parameter will determine the quantile if the normalized
+ ranking does not match the location of ``q`` exactly. This function is the
+ same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and
+ the same as the maximum if ``q=1.0``.
+
+ This optional `method` parameter specifies the method to use when the
+ desired quantile lies between two data points ``i < j``.
+ If ``g`` is the fractional part of the index surrounded by ``i`` and
+ alpha and beta are correction constants modifying i and j.
+
+ .. math::
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+
+ The different methods then work as follows
+
+ inverted_cdf:
+ method 1 of H&F [1]_.
+ This method gives discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then take i
+
+ averaged_inverted_cdf:
+ method 2 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 ; then average between bounds
+
+ closest_observation:
+ method 3 of H&F [1]_.
+ This method give discontinuous results:
+ * if g > 0 ; then take j
+ * if g = 0 and index is odd ; then take j
+ * if g = 0 and index is even ; then take i
+
+ interpolated_inverted_cdf:
+ method 4 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 1
+
+ hazen:
+ method 5 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1/2
+ * beta = 1/2
+
+ weibull:
+ method 6 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 0
+ * beta = 0
+
+ linear:
+ method 7 of H&F [1]_.
+ This method give continuous results using:
+ * alpha = 1
+ * beta = 1
+
+ median_unbiased:
+ method 8 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is unknown (see reference).
+ This method give continuous results using:
+ * alpha = 1/3
+ * beta = 1/3
+
+ normal_unbiased:
+ method 9 of H&F [1]_.
+ This method is probably the best method if the sample
+ distribution function is known to be normal.
+ This method give continuous results using:
+ * alpha = 3/8
+ * beta = 3/8
+
+ lower:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` as the interpolation point.
+
+ higher:
+ NumPy method kept for backwards compatibility.
+ Takes ``j`` as the interpolation point.
+
+ nearest:
+ NumPy method kept for backwards compatibility.
+ Takes ``i`` or ``j``, whichever is nearest.
+
+ midpoint:
+ NumPy method kept for backwards compatibility.
+ Uses ``(i + j) / 2``.
Examples
--------
@@ -4005,20 +4351,42 @@ def quantile(a, q, axis=None, out=None,
>>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
array([7., 2.])
>>> assert not np.all(a == b)
+
+ See also `numpy.percentile` for a visualization of most methods.
+
+ References
+ ----------
+ .. [1] R. J. Hyndman and Y. Fan,
+ "Sample quantiles in statistical packages,"
+ The American Statistician, 50(4), pp. 361-365, 1996
+
"""
+ if interpolation is not None:
+ method = _check_interpolation_as_method(
+ method, interpolation, "quantile")
+
q = np.asanyarray(q)
if not _quantile_is_valid(q):
raise ValueError("Quantiles must be in the range [0, 1]")
return _quantile_unchecked(
- a, q, axis, out, overwrite_input, interpolation, keepdims)
+ a, q, axis, out, overwrite_input, method, keepdims)
-def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=False):
+def _quantile_unchecked(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ method="linear",
+ keepdims=False):
"""Assumes that q is in [0, 1], and is an ndarray"""
- r, k = _ureduce(a, func=_quantile_ureduce_func, q=q, axis=axis, out=out,
+ r, k = _ureduce(a,
+ func=_quantile_ureduce_func,
+ q=q,
+ axis=axis,
+ out=out,
overwrite_input=overwrite_input,
- interpolation=interpolation)
+ method=method)
if keepdims:
return r.reshape(q.shape + k)
else:
@@ -4037,122 +4405,278 @@ def _quantile_is_valid(q):
return True
+def _check_interpolation_as_method(method, interpolation, fname):
+ # Deprecated NumPy 1.22, 2021-11-08
+ warnings.warn(
+ f"the `interpolation=` argument to {fname} was renamed to "
+ "`method=`, which has additional options.\n"
+ "Users of the modes 'nearest', 'lower', 'higher', or "
+ "'midpoint' are encouraged to review the method they. "
+ "(Deprecated NumPy 1.22)",
+ DeprecationWarning, stacklevel=4)
+ if method != "linear":
+ # sanity check, we assume this basically never happens
+ raise TypeError(
+ "You shall not pass both `method` and `interpolation`!\n"
+ "(`interpolation` is Deprecated in favor of `method`)")
+ return interpolation
+
+
+def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
+ """
+ Compute the floating point indexes of an array for the linear
+ interpolation of quantiles.
+ n : array_like
+ The sample sizes.
+ quantiles : array_like
+ The quantiles values.
+ alpha : float
+ A constant used to correct the index computed.
+ beta : float
+ A constant used to correct the index computed.
+
+ alpha and beta values depend on the chosen method
+ (see quantile documentation)
+
+ Reference:
+ Hyndman&Fan paper "Sample Quantiles in Statistical Packages",
+ DOI: 10.1080/00031305.1996.10473566
+ """
+ return n * quantiles + (
+ alpha + quantiles * (1 - alpha - beta)
+ ) - 1
+
+
+def _get_gamma(virtual_indexes, previous_indexes, method):
+ """
+ Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
+ of quantiles.
+
+ virtual_indexes : array_like
+ The indexes where the percentile is supposed to be found in the sorted
+ sample.
+ previous_indexes : array_like
+ The floor values of virtual_indexes.
+ interpolation : dict
+ The interpolation method chosen, which may have a specific rule
+ modifying gamma.
+
+ gamma is usually the fractional part of virtual_indexes but can be modified
+ by the interpolation method.
+ """
+ gamma = np.asanyarray(virtual_indexes - previous_indexes)
+ gamma = method["fix_gamma"](gamma, virtual_indexes)
+ return np.asanyarray(gamma)
+
+
def _lerp(a, b, t, out=None):
- """ Linearly interpolate from a to b by a factor of t """
+ """
+ Compute the linear interpolation weighted by gamma on each point of
+ two same shape array.
+
+ a : array_like
+ Left bound.
+ b : array_like
+ Right bound.
+ t : array_like
+ The interpolation weight.
+ out : array_like
+ Output array.
+ """
diff_b_a = subtract(b, a)
# asanyarray is a stop-gap until gh-13105
- lerp_interpolation = asanyarray(add(a, diff_b_a*t, out=out))
- subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t>=0.5)
+ lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out))
+ subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5)
if lerp_interpolation.ndim == 0 and out is None:
lerp_interpolation = lerp_interpolation[()] # unpack 0d arrays
return lerp_interpolation
-def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=False):
- a = asarray(a)
+def _get_gamma_mask(shape, default_value, conditioned_value, where):
+ out = np.full(shape, default_value)
+ np.copyto(out, conditioned_value, where=where, casting="unsafe")
+ return out
- # ufuncs cause 0d array results to decay to scalars (see gh-13105), which
- # makes them problematic for __setitem__ and attribute access. As a
- # workaround, we call this on the result of every ufunc on a possibly-0d
- # array.
- not_scalar = np.asanyarray
- # prepare a for partitioning
- if overwrite_input:
- if axis is None:
- ap = a.ravel()
- else:
- ap = a
- else:
- if axis is None:
- ap = a.flatten()
- else:
- ap = a.copy()
+def _discret_interpolation_to_boundaries(index, gamma_condition_fun):
+ previous = np.floor(index)
+ next = previous + 1
+ gamma = index - previous
+ res = _get_gamma_mask(shape=index.shape,
+ default_value=next,
+ conditioned_value=previous,
+ where=gamma_condition_fun(gamma, index)
+ ).astype(np.intp)
+ # Some methods can lead to out-of-bound integers, clip them:
+ res[res < 0] = 0
+ return res
- if axis is None:
- axis = 0
+def _closest_observation(n, quantiles):
+ gamma_fun = lambda gamma, index: (gamma == 0) & (np.floor(index) % 2 == 0)
+ return _discret_interpolation_to_boundaries((n * quantiles) - 1 - 0.5,
+ gamma_fun)
+
+
+def _inverted_cdf(n, quantiles):
+ gamma_fun = lambda gamma, _: (gamma == 0)
+ return _discret_interpolation_to_boundaries((n * quantiles) - 1,
+ gamma_fun)
+
+
+def _quantile_ureduce_func(
+ a: np.array,
+ q: np.array,
+ axis: int = None,
+ out=None,
+ overwrite_input: bool = False,
+ method="linear",
+) -> np.array:
if q.ndim > 2:
# The code below works fine for nd, but it might not have useful
# semantics. For now, keep the supported dimensions the same as it was
# before.
raise ValueError("q must be a scalar or 1d")
-
- Nx = ap.shape[axis]
- indices = not_scalar(q * (Nx - 1))
- # round fractional indices according to interpolation method
- if interpolation == 'lower':
- indices = floor(indices).astype(intp)
- elif interpolation == 'higher':
- indices = ceil(indices).astype(intp)
- elif interpolation == 'midpoint':
- indices = 0.5 * (floor(indices) + ceil(indices))
- elif interpolation == 'nearest':
- indices = around(indices).astype(intp)
- elif interpolation == 'linear':
- pass # keep index as fraction and interpolate
+ if overwrite_input:
+ if axis is None:
+ axis = 0
+ arr = a.ravel()
+ else:
+ arr = a
else:
- raise ValueError(
- "interpolation can only be 'linear', 'lower' 'higher', "
- "'midpoint', or 'nearest'")
+ if axis is None:
+ axis = 0
+ arr = a.flatten()
+ else:
+ arr = a.copy()
+ result = _quantile(arr,
+ quantiles=q,
+ axis=axis,
+ method=method,
+ out=out)
+ return result
- # The dimensions of `q` are prepended to the output shape, so we need the
- # axis being sampled from `ap` to be first.
- ap = np.moveaxis(ap, axis, 0)
- del axis
- if np.issubdtype(indices.dtype, np.integer):
- # take the points along axis
+def _get_indexes(arr, virtual_indexes, valid_values_count):
+ """
+ Get the valid indexes of arr neighbouring virtual_indexes.
+ Note
+ This is a companion function to linear interpolation of
+ Quantiles
- if np.issubdtype(a.dtype, np.inexact):
+ Returns
+ -------
+ (previous_indexes, next_indexes): Tuple
+ A Tuple of virtual_indexes neighbouring indexes
+ """
+ previous_indexes = np.asanyarray(np.floor(virtual_indexes))
+ next_indexes = np.asanyarray(previous_indexes + 1)
+ indexes_above_bounds = virtual_indexes >= valid_values_count - 1
+ # When indexes is above max index, take the max value of the array
+ if indexes_above_bounds.any():
+ previous_indexes[indexes_above_bounds] = -1
+ next_indexes[indexes_above_bounds] = -1
+ # When indexes is below min index, take the min value of the array
+ indexes_below_bounds = virtual_indexes < 0
+ if indexes_below_bounds.any():
+ previous_indexes[indexes_below_bounds] = 0
+ next_indexes[indexes_below_bounds] = 0
+ if np.issubdtype(arr.dtype, np.inexact):
+ # After the sort, slices having NaNs will have for last element a NaN
+ virtual_indexes_nans = np.isnan(virtual_indexes)
+ if virtual_indexes_nans.any():
+ previous_indexes[virtual_indexes_nans] = -1
+ next_indexes[virtual_indexes_nans] = -1
+ previous_indexes = previous_indexes.astype(np.intp)
+ next_indexes = next_indexes.astype(np.intp)
+ return previous_indexes, next_indexes
+
+
+def _quantile(
+ arr: np.array,
+ quantiles: np.array,
+ axis: int = -1,
+ method="linear",
+ out=None,
+):
+ """
+ Private function that doesn't support extended axis or keepdims.
+ These methods are extended to this function using _ureduce
+ See nanpercentile for parameter usage
+ It computes the quantiles of the array for the given axis.
+ A linear interpolation is performed based on the `interpolation`.
+
+ By default, the method is "linear" where alpha == beta == 1 which
+ performs the 7th method of Hyndman&Fan.
+ With "median_unbiased" we get alpha == beta == 1/3
+ thus the 8th method of Hyndman&Fan.
+ """
+ # --- Setup
+ arr = np.asanyarray(arr)
+ values_count = arr.shape[axis]
+ # The dimensions of `q` are prepended to the output shape, so we need the
+ # axis being sampled from `arr` to be last.
+ DATA_AXIS = 0
+ if axis != DATA_AXIS: # But moveaxis is slow, so only call it if axis!=0.
+ arr = np.moveaxis(arr, axis, destination=DATA_AXIS)
+ # --- Computation of indexes
+ # Index where to find the value in the sorted array.
+ # Virtual because it is a floating point value, not an valid index.
+ # The nearest neighbours are used for interpolation
+ try:
+ method = _QuantileMethods[method]
+ except KeyError:
+ raise ValueError(
+ f"{method!r} is not a valid method. Use one of: "
+ f"{_QuantileMethods.keys()}") from None
+ virtual_indexes = method["get_virtual_index"](values_count, quantiles)
+ virtual_indexes = np.asanyarray(virtual_indexes)
+ if np.issubdtype(virtual_indexes.dtype, np.integer):
+ # No interpolation needed, take the points along axis
+ if np.issubdtype(arr.dtype, np.inexact):
# may contain nan, which would sort to the end
- ap.partition(concatenate((indices.ravel(), [-1])), axis=0)
- n = np.isnan(ap[-1])
+ arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0)
+ slices_having_nans = np.isnan(arr[-1])
else:
# cannot contain nan
- ap.partition(indices.ravel(), axis=0)
- n = np.array(False, dtype=bool)
-
- r = take(ap, indices, axis=0, out=out)
-
+ arr.partition(virtual_indexes.ravel(), axis=0)
+ slices_having_nans = np.array(False, dtype=bool)
+ result = take(arr, virtual_indexes, axis=0, out=out)
else:
- # weight the points above and below the indices
-
- indices_below = not_scalar(floor(indices)).astype(intp)
- indices_above = not_scalar(indices_below + 1)
- indices_above[indices_above > Nx - 1] = Nx - 1
-
- if np.issubdtype(a.dtype, np.inexact):
- # may contain nan, which would sort to the end
- ap.partition(concatenate((
- indices_below.ravel(), indices_above.ravel(), [-1]
- )), axis=0)
- n = np.isnan(ap[-1])
+ previous_indexes, next_indexes = _get_indexes(arr,
+ virtual_indexes,
+ values_count)
+ # --- Sorting
+ arr.partition(
+ np.unique(np.concatenate(([0, -1],
+ previous_indexes.ravel(),
+ next_indexes.ravel(),
+ ))),
+ axis=DATA_AXIS)
+ if np.issubdtype(arr.dtype, np.inexact):
+ slices_having_nans = np.isnan(
+ take(arr, indices=-1, axis=DATA_AXIS)
+ )
else:
- # cannot contain nan
- ap.partition(concatenate((
- indices_below.ravel(), indices_above.ravel()
- )), axis=0)
- n = np.array(False, dtype=bool)
-
- weights_shape = indices.shape + (1,) * (ap.ndim - 1)
- weights_above = not_scalar(indices - indices_below).reshape(weights_shape)
-
- x_below = take(ap, indices_below, axis=0)
- x_above = take(ap, indices_above, axis=0)
-
- r = _lerp(x_below, x_above, weights_above, out=out)
-
- # if any slice contained a nan, then all results on that slice are also nan
- if np.any(n):
- if r.ndim == 0 and out is None:
+ slices_having_nans = None
+ # --- Get values from indexes
+ previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
+ next = np.take(arr, next_indexes, axis=DATA_AXIS)
+ # --- Linear interpolation
+ gamma = _get_gamma(virtual_indexes, previous_indexes, method)
+ result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
+ gamma = gamma.reshape(result_shape)
+ result = _lerp(previous,
+ next,
+ gamma,
+ out=out)
+ if np.any(slices_having_nans):
+ if result.ndim == 0 and out is None:
# can't write to a scalar
- r = a.dtype.type(np.nan)
+ result = arr.dtype.type(np.nan)
else:
- r[..., n] = a.dtype.type(np.nan)
-
- return r
+ result[..., slices_having_nans] = np.nan
+ return result
def _trapz_dispatcher(y, x=None, dx=None, axis=None):
@@ -4753,9 +5277,8 @@ def insert(arr, obj, values, axis=None):
if indices.size == 1:
index = indices.item()
if index < -N or index > N:
- raise IndexError(
- "index %i is out of bounds for axis %i with "
- "size %i" % (obj, axis, N))
+ raise IndexError(f"index {obj} is out of bounds for axis {axis} "
+ f"with size {N}")
if (index < 0):
index += N