summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2021-11-15 17:31:41 -0700
committerGitHub <noreply@github.com>2021-11-15 17:31:41 -0700
commit376ad691fe4df77e502108d279872f56b30376dc (patch)
tree11635deb5fb17746e7d18035d747edc7c1217a31 /numpy/lib/function_base.py
parentb75fe5766c3739972e8f5c10a85f320f08e74d26 (diff)
parent546c47adae1066411ff7a3e3da5d758236ee90cf (diff)
downloadnumpy-376ad691fe4df77e502108d279872f56b30376dc.tar.gz
Merge pull request #20327 from seberg/rename-interpolation
BUG,DEP: Fixup quantile/percentile and rename interpolation->method
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py269
1 files changed, 158 insertions, 111 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 3c9983edf..a215f63d3 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -50,8 +50,8 @@ __all__ = [
'quantile'
]
-# _QuantileInterpolation is a dictionary listing all the supported
-# interpolation methods to compute quantile/percentile.
+# _QuantileMethods is a dictionary listing all the supported methods to
+# compute quantile/percentile.
#
# Below virtual_index refer to the index of the element where the percentile
# would be found in the sorted sample.
@@ -61,13 +61,13 @@ __all__ = [
# is made of a integer part (a.k.a 'i' or 'left') and a fractional part
# (a.k.a 'g' or 'gamma')
#
-# Each _QuantileInterpolation have two properties
+# Each method in _QuantileMethods has two properties
# get_virtual_index : Callable
# The function used to compute the virtual_index.
# fix_gamma : Callable
# A function used for discret methods to force the index to a specific value.
-_QuantileInterpolation = dict(
- # --- HYNDMAN AND FAN METHODS
+_QuantileMethods = dict(
+ # --- HYNDMAN and FAN METHODS
# Discrete methods
inverted_cdf=dict(
get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
@@ -3854,7 +3854,7 @@ def _median(a, axis=None, out=None, overwrite_input=False):
def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
- interpolation=None, keepdims=None):
+ method=None, keepdims=None, *, interpolation=None):
return (a, q, out)
@@ -3864,8 +3864,10 @@ def percentile(a,
axis=None,
out=None,
overwrite_input=False,
- interpolation="linear",
- keepdims=False):
+ method="linear",
+ keepdims=False,
+ *,
+ interpolation=None):
"""
Compute the q-th percentile of the data along the specified axis.
@@ -3893,31 +3895,33 @@ def percentile(a,
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
- interpolation : str, optional
- This parameter specifies the interpolation method to
- use when the desired percentile lies between two data points
- There are many different methods, some unique to NumPy. See the
- notes for explanation. Options
-
- * (NPY 1): 'lower'
- * (NPY 2): 'higher',
- * (NPY 3): 'midpoint'
- * (NPY 4): 'nearest'
- * (NPY 5): 'linear'
-
- New options:
-
- * (H&F 1): 'inverted_cdf'
- * (H&F 2): 'averaged_inverted_cdf'
- * (H&F 3): 'closest_observation'
- * (H&F 4): 'interpolated_inverted_cdf'
- * (H&F 5): 'hazen'
- * (H&F 6): 'weibull'
- * (H&F 7): 'linear' (default)
- * (H&F 8): 'median_unbiased'
- * (H&F 9): 'normal_unbiased'
+ method : str, optional
+ This parameter specifies the method to use for estimating the
+ percentile. There are many different methods, some unique to NumPy.
+ See the notes for explanation. The options sorted by their R type
+ as summarized in the H&F paper [1]_ are:
+
+ 1. 'inverted_cdf'
+ 2. 'averaged_inverted_cdf'
+ 3. 'closest_observation'
+ 4. 'interpolated_inverted_cdf'
+ 5. 'hazen'
+ 6. 'weibull'
+ 7. 'linear' (default)
+ 8. 'median_unbiased'
+ 9. 'normal_unbiased'
+
+ The first three methods are discontiuous. NumPy further defines the
+ following discontinuous variations of the default 'linear' (7.) option:
+
+ * 'lower'
+ * 'higher',
+ * 'midpoint'
+ * 'nearest'
.. versionchanged:: 1.22.0
+ This argument was previously called "interpolation" and only
+ offered the "linear" default and last four options.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
@@ -3926,6 +3930,11 @@ def percentile(a,
.. versionadded:: 1.9.0
+ interpolation : str, optional
+ Deprecated name for the method keyword argument.
+
+ .. deprecated:: 1.22.0
+
Returns
-------
percentile : scalar or ndarray
@@ -3950,16 +3959,16 @@ def percentile(a,
Given a vector ``V`` of length ``N``, the q-th percentile of ``V`` is
the value ``q/100`` of the way from the minimum to the maximum in a
sorted copy of ``V``. The values and distances of the two nearest
- neighbors as well as the `interpolation` parameter will determine the
+ neighbors as well as the `method` parameter will determine the
percentile if the normalized ranking does not match the location of
``q`` exactly. This function is the same as the median if ``q=50``, the
same as the minimum if ``q=0`` and the same as the maximum if
``q=100``.
- This optional `interpolation` parameter specifies the interpolation
- method to use when the desired quantile lies between two data points
- ``i < j``. If ``g`` is the fractional part of the index surrounded by
- ``i`` and alpha and beta are correction constants modifying i and j.
+ This optional `method` parameter specifies the method to use when the
+ desired quantile lies between two data points ``i < j``.
+ If ``g`` is the fractional part of the index surrounded by ``i`` and
+ alpha and beta are correction constants modifying i and j.
Below, 'q' is the quantile value, 'n' is the sample size and
alpha and beta are constants.
@@ -3970,7 +3979,7 @@ def percentile(a,
.. math::
i + g = (q - alpha) / ( n - alpha - beta + 1 )
- The different interpolation methods then work as follows
+ The different methods then work as follows
inverted_cdf:
method 1 of H&F [1]_.
@@ -4075,7 +4084,7 @@ def percentile(a,
array([7., 2.])
>>> assert not np.all(a == b)
- The different types of interpolation can be visualized graphically:
+ The different methods can be visualized graphically:
.. plot::
@@ -4085,20 +4094,25 @@ def percentile(a,
p = np.linspace(0, 100, 6001)
ax = plt.gca()
lines = [
- ('linear', None),
- ('higher', '--'),
- ('lower', '--'),
- ('nearest', '-.'),
- ('midpoint', '-.'),
- ]
- for interpolation, style in lines:
+ ('linear', '-', 'C0'),
+ ('inverted_cdf', ':', 'C1'),
+ # Almost the same as `inverted_cdf`:
+ ('averaged_inverted_cdf', '-.', 'C1'),
+ ('closest_observation', ':', 'C2'),
+ ('interpolated_inverted_cdf', '--', 'C1'),
+ ('hazen', '--', 'C3'),
+ ('weibull', '-.', 'C4'),
+ ('median_unbiased', '--', 'C5'),
+ ('normal_unbiased', '-.', 'C6'),
+ ]
+ for method, style, color in lines:
ax.plot(
- p, np.percentile(a, p, interpolation=interpolation),
- label=interpolation, linestyle=style)
+ p, np.percentile(a, p, method=method),
+ label=method, linestyle=style, color=color)
ax.set(
- title='Interpolation methods for list: ' + str(a),
+ title='Percentiles for different methods and data: ' + str(a),
xlabel='Percentile',
- ylabel='List item returned',
+ ylabel='Estimated percentile value',
yticks=a)
ax.legend()
plt.show()
@@ -4110,16 +4124,19 @@ def percentile(a,
The American Statistician, 50(4), pp. 361-365, 1996
"""
+ if interpolation is not None:
+ method = _check_interpolation_as_method(
+ method, interpolation, "percentile")
q = np.true_divide(q, 100)
q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105)
if not _quantile_is_valid(q):
raise ValueError("Percentiles must be in the range [0, 100]")
return _quantile_unchecked(
- a, q, axis, out, overwrite_input, interpolation, keepdims)
+ a, q, axis, out, overwrite_input, method, keepdims)
def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
- interpolation=None, keepdims=None):
+ method=None, keepdims=None, *, interpolation=None):
return (a, q, out)
@@ -4129,8 +4146,10 @@ def quantile(a,
axis=None,
out=None,
overwrite_input=False,
- interpolation="linear",
- keepdims=False):
+ method="linear",
+ keepdims=False,
+ *,
+ interpolation=None):
"""
Compute the q-th quantile of the data along the specified axis.
@@ -4155,37 +4174,44 @@ def quantile(a,
intermediate calculations, to save memory. In this case, the
contents of the input `a` after this function completes is
undefined.
- interpolation : str, optional
- This parameter specifies the interpolation method to use when the
- desired quantile lies between two data points There are many
- different methods, some unique to NumPy. See the notes for
- explanation. Options:
-
- * (NPY 1): 'lower'
- * (NPY 2): 'higher',
- * (NPY 3): 'midpoint'
- * (NPY 4): 'nearest'
- * (NPY 5): 'linear'
-
- New options:
-
- * (H&F 1): 'inverted_cdf'
- * (H&F 2): 'averaged_inverted_cdf'
- * (H&F 3): 'closest_observation'
- * (H&F 4): 'interpolated_inverted_cdf'
- * (H&F 5): 'hazen'
- * (H&F 6): 'weibull'
- * (H&F 7): 'linear' (default)
- * (H&F 8): 'median_unbiased'
- * (H&F 9): 'normal_unbiased'
-
- .. versionadded:: 1.22.0
+ method : str, optional
+ This parameter specifies the method to use for estimating the
+ quantile. There are many different methods, some unique to NumPy.
+ See the notes for explanation. The options sorted by their R type
+ as summarized in the H&F paper [1]_ are:
+
+ 1. 'inverted_cdf'
+ 2. 'averaged_inverted_cdf'
+ 3. 'closest_observation'
+ 4. 'interpolated_inverted_cdf'
+ 5. 'hazen'
+ 6. 'weibull'
+ 7. 'linear' (default)
+ 8. 'median_unbiased'
+ 9. 'normal_unbiased'
+
+ The first three methods are discontiuous. NumPy further defines the
+ following discontinuous variations of the default 'linear' (7.) option:
+
+ * 'lower'
+ * 'higher',
+ * 'midpoint'
+ * 'nearest'
+
+ .. versionchanged:: 1.22.0
+ This argument was previously called "interpolation" and only
+ offered the "linear" default and last four options.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
result will broadcast correctly against the original array `a`.
+ interpolation : str, optional
+ Deprecated name for the method keyword argument.
+
+ .. deprecated:: 1.22.0
+
Returns
-------
quantile : scalar or ndarray
@@ -4210,20 +4236,20 @@ def quantile(a,
Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the
value ``q`` of the way from the minimum to the maximum in a sorted copy of
``V``. The values and distances of the two nearest neighbors as well as the
- `interpolation` parameter will determine the quantile if the normalized
+ `method` parameter will determine the quantile if the normalized
ranking does not match the location of ``q`` exactly. This function is the
same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and
the same as the maximum if ``q=1.0``.
- This optional `interpolation` parameter specifies the interpolation method
- to use when the desired quantile lies between two data points ``i < j``. If
- ``g`` is the fractional part of the index surrounded by ``i`` and alpha
- and beta are correction constants modifying i and j.
+ This optional `method` parameter specifies the method to use when the
+ desired quantile lies between two data points ``i < j``.
+ If ``g`` is the fractional part of the index surrounded by ``i`` and
+ alpha and beta are correction constants modifying i and j.
.. math::
i + g = (q - alpha) / ( n - alpha - beta + 1 )
- The different interpolation methods then work as follows
+ The different methods then work as follows
inverted_cdf:
method 1 of H&F [1]_.
@@ -4326,6 +4352,8 @@ def quantile(a,
array([7., 2.])
>>> assert not np.all(a == b)
+ See also `numpy.percentile` for a visualization of most methods.
+
References
----------
.. [1] R. J. Hyndman and Y. Fan,
@@ -4333,11 +4361,15 @@ def quantile(a,
The American Statistician, 50(4), pp. 361-365, 1996
"""
+ if interpolation is not None:
+ method = _check_interpolation_as_method(
+ method, interpolation, "quantile")
+
q = np.asanyarray(q)
if not _quantile_is_valid(q):
raise ValueError("Quantiles must be in the range [0, 1]")
return _quantile_unchecked(
- a, q, axis, out, overwrite_input, interpolation, keepdims)
+ a, q, axis, out, overwrite_input, method, keepdims)
def _quantile_unchecked(a,
@@ -4345,7 +4377,7 @@ def _quantile_unchecked(a,
axis=None,
out=None,
overwrite_input=False,
- interpolation="linear",
+ method="linear",
keepdims=False):
"""Assumes that q is in [0, 1], and is an ndarray"""
r, k = _ureduce(a,
@@ -4354,7 +4386,7 @@ def _quantile_unchecked(a,
axis=axis,
out=out,
overwrite_input=overwrite_input,
- interpolation=interpolation)
+ method=method)
if keepdims:
return r.reshape(q.shape + k)
else:
@@ -4373,6 +4405,23 @@ def _quantile_is_valid(q):
return True
+def _check_interpolation_as_method(method, interpolation, fname):
+ # Deprecated NumPy 1.22, 2021-11-08
+ warnings.warn(
+ f"the `interpolation=` argument to {fname} was renamed to "
+ "`method=`, which has additional options.\n"
+ "Users of the modes 'nearest', 'lower', 'higher', or "
+ "'midpoint' are encouraged to review the method they. "
+ "(Deprecated NumPy 1.22)",
+ DeprecationWarning, stacklevel=4)
+ if method != "linear":
+ # sanity check, we assume this basically never happens
+ raise TypeError(
+ "You shall not pass both `method` and `interpolation`!\n"
+ "(`interpolation` is Deprecated in favor of `method`)")
+ return interpolation
+
+
def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
"""
Compute the floating point indexes of an array for the linear
@@ -4398,9 +4447,7 @@ def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
) - 1
-def _get_gamma(virtual_indexes,
- previous_indexes,
- interpolation: _QuantileInterpolation):
+def _get_gamma(virtual_indexes, previous_indexes, method):
"""
Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
of quantiles.
@@ -4410,7 +4457,7 @@ def _get_gamma(virtual_indexes,
sample.
previous_indexes : array_like
The floor values of virtual_indexes.
- interpolation : _QuantileInterpolation
+ interpolation : dict
The interpolation method chosen, which may have a specific rule
modifying gamma.
@@ -4418,7 +4465,7 @@ def _get_gamma(virtual_indexes,
by the interpolation method.
"""
gamma = np.asanyarray(virtual_indexes - previous_indexes)
- gamma = interpolation["fix_gamma"](gamma, virtual_indexes)
+ gamma = method["fix_gamma"](gamma, virtual_indexes)
return np.asanyarray(gamma)
@@ -4447,7 +4494,7 @@ def _lerp(a, b, t, out=None):
def _get_gamma_mask(shape, default_value, conditioned_value, where):
out = np.full(shape, default_value)
- out[where] = conditioned_value
+ np.copyto(out, conditioned_value, where=where, casting="unsafe")
return out
@@ -4455,11 +4502,14 @@ def _discret_interpolation_to_boundaries(index, gamma_condition_fun):
previous = np.floor(index)
next = previous + 1
gamma = index - previous
- return _get_gamma_mask(shape=index.shape,
- default_value=next,
- conditioned_value=previous,
- where=gamma_condition_fun(gamma, index)
- ).astype(np.intp)
+ res = _get_gamma_mask(shape=index.shape,
+ default_value=next,
+ conditioned_value=previous,
+ where=gamma_condition_fun(gamma, index)
+ ).astype(np.intp)
+ # Some methods can lead to out-of-bound integers, clip them:
+ res[res < 0] = 0
+ return res
def _closest_observation(n, quantiles):
@@ -4480,7 +4530,7 @@ def _quantile_ureduce_func(
axis: int = None,
out=None,
overwrite_input: bool = False,
- interpolation="linear",
+ method="linear",
) -> np.array:
if q.ndim > 2:
# The code below works fine for nd, but it might not have useful
@@ -4502,7 +4552,7 @@ def _quantile_ureduce_func(
result = _quantile(arr,
quantiles=q,
axis=axis,
- interpolation=interpolation,
+ method=method,
out=out)
return result
@@ -4546,7 +4596,7 @@ def _quantile(
arr: np.array,
quantiles: np.array,
axis: int = -1,
- interpolation="linear",
+ method="linear",
out=None,
):
"""
@@ -4556,8 +4606,8 @@ def _quantile(
It computes the quantiles of the array for the given axis.
A linear interpolation is performed based on the `interpolation`.
- By default, the interpolation is "linear" where
- alpha == beta == 1 which performs the 7th method of Hyndman&Fan.
+ By default, the method is "linear" where alpha == beta == 1 which
+ performs the 7th method of Hyndman&Fan.
With "median_unbiased" we get alpha == beta == 1/3
thus the 8th method of Hyndman&Fan.
"""
@@ -4574,13 +4624,12 @@ def _quantile(
# Virtual because it is a floating point value, not an valid index.
# The nearest neighbours are used for interpolation
try:
- interpolation = _QuantileInterpolation[interpolation]
+ method = _QuantileMethods[method]
except KeyError:
raise ValueError(
- f"{interpolation!r} is not a valid interpolation. Use one of: "
- f"{_QuantileInterpolation.keys()}") from None
- virtual_indexes = interpolation["get_virtual_index"](values_count,
- quantiles)
+ f"{method!r} is not a valid method. Use one of: "
+ f"{_QuantileMethods.keys()}") from None
+ virtual_indexes = method["get_virtual_index"](values_count, quantiles)
virtual_indexes = np.asanyarray(virtual_indexes)
if np.issubdtype(virtual_indexes.dtype, np.integer):
# No interpolation needed, take the points along axis
@@ -4614,9 +4663,7 @@ def _quantile(
previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
next = np.take(arr, next_indexes, axis=DATA_AXIS)
# --- Linear interpolation
- gamma = _get_gamma(virtual_indexes,
- previous_indexes,
- interpolation)
+ gamma = _get_gamma(virtual_indexes, previous_indexes, method)
result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
gamma = gamma.reshape(result_shape)
result = _lerp(previous,