From cd7a02a4db7e760b881f3feeb832ffd84fa8645a Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Thu, 2 Sep 2021 16:34:42 +0200
Subject: MAINT, ENH [#10736] Add interpolation methods to quantile

- Added the missing linear interpolation methods.
- Updated the existing unit tests.

- Added pytest.mark.xfail for boolean arrays
See
- https://github.com/numpy/numpy/pull/19857#issuecomment-919258693
- https://github.com/numpy/numpy/issues/19154
---
 numpy/lib/function_base.py | 593 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 469 insertions(+), 124 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 20e32a78d..67a34f6e1 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -52,6 +52,89 @@ __all__ = [
     ]
 
 
+_QuantileInterpolation = dict(
+    # --- HYNDMAN and FAN methods
+    # Discrete methods
+    inverted_cdf=dict(
+        get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
+        fix_gamma=lambda gamma, _: gamma,  # should never be called
+    ),
+    averaged_inverted_cdf=dict(
+        get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
+        fix_gamma=lambda gamma, _: _get_gamma_mask(
+            shape=gamma.shape,
+            default_value=1.,
+            conditioned_value=0.5,
+            where=gamma == 0),
+    ),
+    closest_observation=dict(
+        get_virtual_index=lambda n, quantiles: _closest_observation(n,
+                                                                    quantiles),
+        fix_gamma=lambda gamma, _: gamma,  # should never be called
+    ),
+    # Continuous methods
+    interpolated_inverted_cdf=dict(
+        get_virtual_index=lambda n, quantiles:
+            _virtual_index_formula(n, quantiles, 0, 1),
+        fix_gamma=lambda gamma, _: gamma,
+    ),
+    hazen=dict(
+        get_virtual_index=lambda n, quantiles:
+            _virtual_index_formula(n, quantiles, 0.5, 0.5),
+        fix_gamma=lambda gamma, _: gamma,
+    ),
+    weibull=dict(
+        get_virtual_index=lambda n, quantiles:
+            _virtual_index_formula(n, quantiles, 0, 0),
+        fix_gamma=lambda gamma, _: gamma,
+    ),
+    # Default value
+    linear=dict(
+        get_virtual_index=lambda n, quantiles:
+            _virtual_index_formula(n, quantiles, 1, 1),
+        fix_gamma=lambda gamma, _: gamma,
+    ),
+    median_unbiased=dict(
+        get_virtual_index=lambda n, quantiles:
+            _virtual_index_formula(n, quantiles, 1 / 3.0, 1 / 3.0),
+        fix_gamma=lambda gamma, _: gamma,
+    ),
+    normal_unbiased=dict(
+        get_virtual_index=lambda n, quantiles:
+        _virtual_index_formula(n, quantiles, 3 / 8.0, 3 / 8.0),
+        fix_gamma=lambda gamma, _: gamma,
+    ),
+    # --- OTHER METHODS fixme add deprecated ?
+    lower=dict(
+        get_virtual_index=lambda n, quantiles: np.floor(
+            (n - 1) * quantiles).astype(np.intp),
+        fix_gamma=lambda gamma, _: gamma,
+        # should never be called, index dtype is int
+    ),
+    higher=dict(
+        get_virtual_index=lambda n, quantiles: np.ceil(
+            (n - 1) * quantiles).astype(np.intp),
+        fix_gamma=lambda gamma, _: gamma,
+        # should never be called, index dtype is int
+    ),
+    midpoint=dict(
+        get_virtual_index=lambda n, quantiles: 0.5 * (
+                np.floor((n - 1) * quantiles)
+                + np.ceil((n - 1) * quantiles)),
+        fix_gamma=lambda gamma, index: _get_gamma_mask(
+            shape=gamma.shape,
+            default_value=0.5,
+            conditioned_value=0.,
+            where=index % 1 == 0),
+    ),
+    nearest=dict(
+        get_virtual_index=lambda n, quantiles: np.around(
+            (n - 1) * quantiles).astype(np.intp),
+        fix_gamma=lambda gamma, _: gamma,
+        # should never be called, index dtype is int
+    ))
+
+
 def _rot90_dispatcher(m, k=None, axes=None):
     return (m,)
 
@@ -3760,8 +3843,13 @@ def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
 
 
 @array_function_dispatch(_percentile_dispatcher)
-def percentile(a, q, axis=None, out=None,
-               overwrite_input=False, interpolation='linear', keepdims=False):
+def percentile(a,
+               q,
+               axis=None,
+               out=None,
+               overwrite_input=False,
+               interpolation="linear",
+               keepdims=False):
     """
     Compute the q-th percentile of the data along the specified axis.
 
@@ -3790,20 +3878,75 @@ def percentile(a, q, axis=None, out=None,
         calculations, to save memory. In this case, the contents of the input
         `a` after this function completes is undefined.
 
-    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+    interpolation : str
+        Possible values: 'linear' (default),
+                        'inverted_cdf', 'averaged_inverted_cdf',
+                        'closest_observation', 'interpolated_inverted_cdf',
+                        'hazen', 'weibull',
+                        'median_unbiased', 'normal_unbiased',
+                        'lower', 'higher',
+                        'midpoint', 'nearest'.
         This optional parameter specifies the interpolation method to
-        use when the desired percentile lies between two data points
-        ``i < j``:
+        use when the desired quantile lies between two data points ``i < j``.
+        g is the fractional part of the index surrounded by ``i``.
+        alpha and beta are correction constants modifying i and j:
+        i + g = (q - alpha) / ( n - alpha - beta + 1 )
+        * inverted_cdf:
+        method 1 of H&F.
+        This method give discontinuous results:
+            if g > 0 ; then take j
+            if g = 0 ; then take i
+        * averaged_inverted_cdf:
+        method 2 of H&F.
+        This method give discontinuous results:
+            if g > 0 ; then take j
+            if g = 0 ; then average between bounds
+        * closest_observation:
+        method 3 of H&F.
+        This method give discontinuous results:
+            if g > 0 ; then take j
+            if g = 0 and index is odd ; then take j
+            if g = 0 and index is even ; then take i
+        * interpolated_inverted_cdf:
+        method 4 of H&F.
+        This method give continuous results using:
+            alpha = 0
+            beta = 1
+        * hazen:
+        method 5 of H&F.
+        This method give continuous results using:
+            alpha = 1/2
+            beta = 1/2
+        * weibull:
+        method 6 of H&F.
+        This method give continuous results using:
+            alpha = 0
+            beta = 0
+        * inclusive:
+        Default method, aliased with "linear".
+        method 7 of H&F.
+        This method give continuous results using:
+            alpha = 1
+            beta = 1
+        * median_unbiased:
+        method 8 of H&F.
+        This method is probably the best method if the sample distribution
+        function is unknown (see reference).
+        This method give continuous results using:
+            alpha = 1/3
+            beta = 1/3
+        * normal_unbiased:
+        method 9 of H&F.
+        This method is probably the best method if the sample distribution
+        function is known to be normal.
+        This method give continuous results using:
+            alpha = 3/8
+            beta = 3/8
+        * lower: ``i``.
+        * higher: ``j``.
+        * nearest: ``i`` or ``j``, whichever is nearest.
+        * midpoint: ``(i + j) / 2``.
 
-        * 'linear': ``i + (j - i) * fraction``, where ``fraction``
-          is the fractional part of the index surrounded by ``i``
-          and ``j``.
-        * 'lower': ``i``.
-        * 'higher': ``j``.
-        * 'nearest': ``i`` or ``j``, whichever is nearest.
-        * 'midpoint': ``(i + j) / 2``.
-
-        .. versionadded:: 1.9.0
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left in
         the result as dimensions with size one. With this option, the
@@ -3897,6 +4040,11 @@ def percentile(a, q, axis=None, out=None,
         ax.legend()
         plt.show()
 
+    References
+    ----------
+    [Hyndman&Fan] Hyndman, R. J., & Fan, Y. (1996).
+    Sample quantiles in statistical packages.
+    The American Statistician, 50(4), 361-365.
     """
     q = np.true_divide(q, 100)
     q = asanyarray(q)  # undo any decay that the ufunc performed (see gh-13105)
@@ -3912,8 +4060,13 @@ def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
 
 
 @array_function_dispatch(_quantile_dispatcher)
-def quantile(a, q, axis=None, out=None,
-             overwrite_input=False, interpolation='linear', keepdims=False):
+def quantile(a,
+             q,
+             axis=None,
+             out=None,
+             overwrite_input=False,
+             interpolation="linear",
+             keepdims=False):
     """
     Compute the q-th quantile of the data along the specified axis.
 
@@ -3938,18 +4091,75 @@ def quantile(a, q, axis=None, out=None,
         If True, then allow the input array `a` to be modified by intermediate
         calculations, to save memory. In this case, the contents of the input
         `a` after this function completes is undefined.
-    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+    interpolation : str
+        Possible values: 'linear' (default),
+                        'inverted_cdf', 'averaged_inverted_cdf',
+                        'closest_observation', 'interpolated_inverted_cdf',
+                        'hazen', 'weibull',
+                        'median_unbiased', 'normal_unbiased',
+                        'lower', 'higher',
+                        'midpoint', 'nearest'.
         This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points
-        ``i < j``:
-
-            * linear: ``i + (j - i) * fraction``, where ``fraction``
-              is the fractional part of the index surrounded by ``i``
-              and ``j``.
-            * lower: ``i``.
-            * higher: ``j``.
-            * nearest: ``i`` or ``j``, whichever is nearest.
-            * midpoint: ``(i + j) / 2``.
+        use when the desired quantile lies between two data points ``i < j``.
+        g is the fractional part of the index surrounded by ``i``.
+        alpha and beta are correction constants modifying i and j:
+        i + g = (q - alpha) / ( n - alpha - beta + 1 )
+        * inverted_cdf:
+            method 1 of H&F.
+            This method give discontinuous results:
+                if g > 0 ; then take j
+                if g = 0 ; then take i
+        * averaged_inverted_cdf:
+            method 2 of H&F.
+            This method give discontinuous results:
+                if g > 0 ; then take j
+                if g = 0 ; then average between bounds
+        * closest_observation:
+            method 3 of H&F.
+            This method give discontinuous results:
+                if g > 0 ; then take j
+                if g = 0 and index is odd ; then take j
+                if g = 0 and index is even ; then take i
+        * interpolated_inverted_cdf:
+            method 4 of H&F.
+            This method give continuous results using:
+                alpha = 0
+                beta = 1
+        * hazen:
+            method 5 of H&F.
+            This method give continuous results using:
+                alpha = 1/2
+                beta = 1/2
+        * weibull:
+            method 6 of H&F.
+            This method give continuous results using:
+                alpha = 0
+                beta = 0
+        * linear:
+            Default method.
+            method 7 of H&F.
+            This method give continuous results using:
+                alpha = 1
+                beta = 1
+        * median_unbiased:
+            method 8 of H&F.
+            This method is probably the best method if the sample distribution
+            function is unknown (see reference).
+            This method give continuous results using:
+                alpha = 1/3
+                beta = 1/3
+        * normal_unbiased:
+            method 9 of H&F.
+            This method is probably the best method if the sample distribution
+            function is known to be normal.
+            This method give continuous results using:
+                alpha = 3/8
+                beta = 3/8
+        * lower: ``i``.
+        * higher: ``j``.
+        * nearest: ``i`` or ``j``, whichever is nearest.
+        * midpoint: ``(i + j) / 2``.
+
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left in
         the result as dimensions with size one. With this option, the
@@ -4010,6 +4220,12 @@ def quantile(a, q, axis=None, out=None,
     >>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
     array([7.,  2.])
     >>> assert not np.all(a == b)
+
+    References
+    ----------
+    [Hyndman&Fan] Hyndman, R. J., & Fan, Y. (1996).
+    Sample quantiles in statistical packages.
+    The American Statistician, 50(4), 361-365.
     """
     q = np.asanyarray(q)
     if not _quantile_is_valid(q):
@@ -4018,10 +4234,19 @@ def quantile(a, q, axis=None, out=None,
         a, q, axis, out, overwrite_input, interpolation, keepdims)
 
 
-def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
-                        interpolation='linear', keepdims=False):
+def _quantile_unchecked(a,
+                        q,
+                        axis=None,
+                        out=None,
+                        overwrite_input=False,
+                        interpolation="linear",
+                        keepdims=False):
     """Assumes that q is in [0, 1], and is an ndarray"""
-    r, k = _ureduce(a, func=_quantile_ureduce_func, q=q, axis=axis, out=out,
+    r, k = _ureduce(a,
+                    func=_quantiles_ureduce_func,
+                    q=q,
+                    axis=axis,
+                    out=out,
                     overwrite_input=overwrite_input,
                     interpolation=interpolation)
     if keepdims:
@@ -4042,122 +4267,242 @@ def _quantile_is_valid(q):
     return True
 
 
-def _lerp(a, b, t, out=None):
-    """ Linearly interpolate from a to b by a factor of t """
-    diff_b_a = subtract(b, a)
-    # asanyarray is a stop-gap until gh-13105
-    lerp_interpolation = asanyarray(add(a, diff_b_a*t, out=out))
-    subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t>=0.5)
-    if lerp_interpolation.ndim == 0 and out is None:
-        lerp_interpolation = lerp_interpolation[()]  # unpack 0d arrays
-    return lerp_interpolation
+def _virtual_index_formula(n: np.array,
+                           quantiles: np.array,
+                           alpha: float,
+                           beta: float) -> np.array:
+    """
+    Compute the floating point indexes of an array for the linear
+    interpolation of quantiles.
+    Reference:
+    Hyndman&Fan paper "Sample Quantiles in Statistical Packages",
+    DOI: 10.1080/00031305.1996.10473566
+    """
+    return n * quantiles + (
+            alpha + quantiles * (1 - alpha - beta)
+    ) - 1
 
 
-def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
-                           interpolation='linear', keepdims=False):
-    a = asarray(a)
+def _get_gamma(virtual_indexes: np.array,
+               previous_indexes: np.array,
+               interpolation: _QuantileInterpolation) -> np.array:
+    """
+    Compute the gamma (a.k.a 'm' or weight) for the linear interpolation
+    of quantiles.
+    When gamma == 0 the left bound will be picked,
+    When gamma == 1 the right bound will be.
+    """
+    gamma = np.asanyarray(virtual_indexes - previous_indexes)
+    gamma = interpolation["fix_gamma"](gamma, virtual_indexes)
+    return np.asanyarray(gamma)
 
-    # ufuncs cause 0d array results to decay to scalars (see gh-13105), which
-    # makes them problematic for __setitem__ and attribute access. As a
-    # workaround, we call this on the result of every ufunc on a possibly-0d
-    # array.
-    not_scalar = np.asanyarray
 
-    # prepare a for partitioning
-    if overwrite_input:
-        if axis is None:
-            ap = a.ravel()
-        else:
-            ap = a
-    else:
-        if axis is None:
-            ap = a.flatten()
-        else:
-            ap = a.copy()
+def _linear_interpolation_formula(
+        left: np.array, right: np.array, gamma: np.array, out: np.array = None
+) -> np.array:
+    """
+    Compute the linear interpolation weighted by gamma on each point of
+    two same shape array.
+    """
+    # Equivalent to gamma * right + (1 - gamma) * left
+    # see gh-14685
+    diff_right_left = subtract(right, left)
+    result = asanyarray(add(left, diff_right_left * gamma, out=out))
+    result = subtract(right,
+                      diff_right_left * (1 - gamma),
+                      out=result,
+                      where=gamma >= 0.5)
+    return result
 
-    if axis is None:
-        axis = 0
 
+def _get_gamma_mask(shape, default_value, conditioned_value, where):
+    out = np.full(shape, default_value)
+    out[where] = conditioned_value
+    return out
+
+
+def _discret_interpolation_to_boundaries(index, gamma_condition_fun):
+    previous = np.floor(index)
+    next = previous + 1
+    gamma = index - previous
+    return _get_gamma_mask(shape=index.shape,
+                           default_value=next,
+                           conditioned_value=previous,
+                           where=gamma_condition_fun(gamma, index)
+                           ).astype(np.intp)
+
+
+def _closest_observation(n, quantiles):
+    gamma_fun = lambda gamma, index: (gamma == 0) & (np.floor(index) % 2 == 0)
+    return _discret_interpolation_to_boundaries((n * quantiles) - 1 - 0.5,
+                                                gamma_fun)
+
+
+def _inverted_cdf(n, quantiles):
+    gamma_fun = lambda gamma, _: (gamma == 0)
+    return _discret_interpolation_to_boundaries((n * quantiles) - 1,
+                                                gamma_fun)
+
+
+def _quantiles_ureduce_func(
+        a: np.array,
+        q: np.array,
+        axis: int = None,
+        out=None,
+        overwrite_input: bool = False,
+        interpolation="linear",
+) -> np.array:
     if q.ndim > 2:
         # The code below works fine for nd, but it might not have useful
         # semantics. For now, keep the supported dimensions the same as it was
         # before.
         raise ValueError("q must be a scalar or 1d")
-
-    Nx = ap.shape[axis]
-    indices = not_scalar(q * (Nx - 1))
-    # round fractional indices according to interpolation method
-    if interpolation == 'lower':
-        indices = floor(indices).astype(intp)
-    elif interpolation == 'higher':
-        indices = ceil(indices).astype(intp)
-    elif interpolation == 'midpoint':
-        indices = 0.5 * (floor(indices) + ceil(indices))
-    elif interpolation == 'nearest':
-        indices = around(indices).astype(intp)
-    elif interpolation == 'linear':
-        pass  # keep index as fraction and interpolate
+    if overwrite_input:
+        if axis is None:
+            axis = 0
+            arr = a.ravel()
+        else:
+            arr = a
     else:
-        raise ValueError(
-            "interpolation can only be 'linear', 'lower' 'higher', "
-            "'midpoint', or 'nearest'")
+        if axis is None:
+            axis = 0
+            arr = a.flatten()
+        else:
+            arr = a.copy()
+    result = _quantile(arr,
+                       quantiles=q,
+                       axis=axis,
+                       interpolation=interpolation,
+                       out=out)
+    if result.ndim == 0 and out is None:
+        result = result[()]  # unpack 0d arrays
+    elif result.size == 1 and out is None and q.ndim == 0:
+        result = result[0]
+    return result
 
-    # The dimensions of `q` are prepended to the output shape, so we need the
-    # axis being sampled from `ap` to be first.
-    ap = np.moveaxis(ap, axis, 0)
-    del axis
 
-    if np.issubdtype(indices.dtype, np.integer):
-        # take the points along axis
+def _get_indexes(arr, virtual_indexes, valid_values_count):
+    """
+    Get the valid indexes of arr neighbouring virtual_indexes.
+    Note
+    This is a companion function to linear interpolation of
+    Quantiles
 
-        if np.issubdtype(a.dtype, np.inexact):
+    Returns
+    -------
+    (previous_indexes, next_indexes): Tuple
+        A Tuple of virtual_indexes neighbouring indexes
+    """
+    previous_indexes = np.asanyarray(np.floor(virtual_indexes))
+    next_indexes = np.asanyarray(previous_indexes + 1)
+    indexes_above_bounds = virtual_indexes >= valid_values_count - 1
+    # When indexes is above max index, take the max value of the array
+    if indexes_above_bounds.any():
+        previous_indexes[indexes_above_bounds] = -1
+        next_indexes[indexes_above_bounds] = -1
+    # When indexes is below min index, take the min value of the array
+    indexes_below_bounds = virtual_indexes < 0
+    if indexes_below_bounds.any():
+        previous_indexes[indexes_below_bounds] = 0
+        next_indexes[indexes_below_bounds] = 0
+    if np.issubdtype(arr.dtype, np.inexact):
+        # After the sort, slices having NaNs will have for last element a NaN
+        virtual_indexes_nans = np.isnan(virtual_indexes)
+        if virtual_indexes_nans.any():
+            previous_indexes[virtual_indexes_nans] = -1
+            next_indexes[virtual_indexes_nans] = -1
+    previous_indexes = previous_indexes.astype(np.intp)
+    next_indexes = next_indexes.astype(np.intp)
+    return previous_indexes, next_indexes
+
+
+def _quantile(
+        arr: np.array,
+        quantiles: np.array,
+        axis: int = -1,
+        interpolation="linear",
+        out=None,
+):
+    """
+    Private function that doesn't support extended axis or keepdims.
+    These methods are extended to this function using _ureduce
+    See nanpercentile for parameter usage
+    It computes the quantiles of the array for the given axis.
+    A linear interpolation is performed based on the `interpolation`.
+
+    By default, the interpolation is "linear" where
+    alpha == beta == 1 which performs the 7th method of Hyndman&Fan.
+    With "median_unbiased" we get alpha == beta == 1/3
+    thus the 8th method of Hyndman&Fan.
+    """
+    # --- Setup
+    arr = np.asanyarray(arr)
+    values_count = arr.shape[axis]
+    # The dimensions of `q` are prepended to the output shape, so we need the
+    # axis being sampled from `arr` to be last.
+    DATA_AXIS = 0
+    arr = np.moveaxis(arr, axis, destination=DATA_AXIS)
+    # --- Computation of indexes
+    # Index where to find the value in the sorted array.
+    # Virtual because it is a floating point value, not an valid index.
+    # The nearest neighbours are used for interpolation
+    try:
+        interpolation = _QuantileInterpolation[interpolation]
+    except KeyError:
+        raise ValueError(
+            f"{interpolation!r} is not a valid interpolation. Use one of: "
+            f"{_QuantileInterpolation.keys()}")
+    virtual_indexes = interpolation["get_virtual_index"](values_count,
+                                                         quantiles)
+    virtual_indexes = np.asanyarray(virtual_indexes)
+    if np.issubdtype(virtual_indexes.dtype, np.integer):
+        # No interpolation needed, take the points along axis
+        if np.issubdtype(arr.dtype, np.inexact):
             # may contain nan, which would sort to the end
-            ap.partition(concatenate((indices.ravel(), [-1])), axis=0)
-            n = np.isnan(ap[-1])
+            arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0)
+            slices_having_nans = np.isnan(arr[-1])
         else:
             # cannot contain nan
-            ap.partition(indices.ravel(), axis=0)
-            n = np.array(False, dtype=bool)
-
-        r = take(ap, indices, axis=0, out=out)
-
+            arr.partition(virtual_indexes.ravel(), axis=0)
+            slices_having_nans = np.array(False, dtype=bool)
+        result = np.asanyarray(take(arr, virtual_indexes, axis=0, out=out))
     else:
-        # weight the points above and below the indices
-
-        indices_below = not_scalar(floor(indices)).astype(intp)
-        indices_above = not_scalar(indices_below + 1)
-        indices_above[indices_above > Nx - 1] = Nx - 1
-
-        if np.issubdtype(a.dtype, np.inexact):
-            # may contain nan, which would sort to the end
-            ap.partition(concatenate((
-                indices_below.ravel(), indices_above.ravel(), [-1]
-            )), axis=0)
-            n = np.isnan(ap[-1])
+        previous_indexes, next_indexes = _get_indexes(arr,
+                                                      virtual_indexes,
+                                                      values_count)
+        # --- Sorting
+        arr.partition(
+            np.unique(np.concatenate(([0, -1],
+                                      previous_indexes.ravel(),
+                                      next_indexes.ravel(),
+                                      ))),
+            axis=DATA_AXIS)
+        if np.issubdtype(arr.dtype, np.inexact):
+            slices_having_nans = np.isnan(
+                take(arr, indices=-1, axis=DATA_AXIS)
+            )
         else:
-            # cannot contain nan
-            ap.partition(concatenate((
-                indices_below.ravel(), indices_above.ravel()
-            )), axis=0)
-            n = np.array(False, dtype=bool)
-
-        weights_shape = indices.shape + (1,) * (ap.ndim - 1)
-        weights_above = not_scalar(indices - indices_below).reshape(weights_shape)
-
-        x_below = take(ap, indices_below, axis=0)
-        x_above = take(ap, indices_above, axis=0)
-
-        r = _lerp(x_below, x_above, weights_above, out=out)
-
-    # if any slice contained a nan, then all results on that slice are also nan
-    if np.any(n):
-        if r.ndim == 0 and out is None:
+            slices_having_nans = None
+        # --- Get values from indexes
+        previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
+        next = np.take(arr, next_indexes, axis=DATA_AXIS)
+        # --- Linear interpolation
+        gamma = _get_gamma(virtual_indexes,
+                           previous_indexes,
+                           interpolation)
+        result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
+        gamma = gamma.reshape(result_shape)
+        result = _linear_interpolation_formula(previous,
+                                               next,
+                                               gamma,
+                                               out=out)
+    if np.any(slices_having_nans):
+        if result.ndim == 0 and out is None:
             # can't write to a scalar
-            r = a.dtype.type(np.nan)
+            result = np.array(np.nan, dtype=arr.dtype)
         else:
-            r[..., n] = a.dtype.type(np.nan)
-
-    return r
+            result[..., slices_having_nans] = np.nan
+    return result
 
 
 def _trapz_dispatcher(y, x=None, dx=None, axis=None):
-- 
cgit v1.2.1


From 303c12cfe7ad1b8b6ed5417c126857b29355b1fb Mon Sep 17 00:00:00 2001
From: Charles Harris <charlesr.harris@gmail.com>
Date: Fri, 8 Oct 2021 17:09:51 -0600
Subject: DOC: fix docstrings.

Hopefully fix the docstrings of percentile, nanpercentile,
quantile, and nanquantile so that CircleCI passes.
---
 numpy/lib/function_base.py | 424 +++++++++++++++++++++++++++------------------
 1 file changed, 255 insertions(+), 169 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 67a34f6e1..dbaba87f9 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3877,75 +3877,31 @@ def percentile(a,
         If True, then allow the input array `a` to be modified by intermediate
         calculations, to save memory. In this case, the contents of the input
         `a` after this function completes is undefined.
-
-    interpolation : str
-        Possible values: 'linear' (default),
-                        'inverted_cdf', 'averaged_inverted_cdf',
-                        'closest_observation', 'interpolated_inverted_cdf',
-                        'hazen', 'weibull',
-                        'median_unbiased', 'normal_unbiased',
-                        'lower', 'higher',
-                        'midpoint', 'nearest'.
-        This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points ``i < j``.
-        g is the fractional part of the index surrounded by ``i``.
-        alpha and beta are correction constants modifying i and j:
-        i + g = (q - alpha) / ( n - alpha - beta + 1 )
-        * inverted_cdf:
-        method 1 of H&F.
-        This method give discontinuous results:
-            if g > 0 ; then take j
-            if g = 0 ; then take i
-        * averaged_inverted_cdf:
-        method 2 of H&F.
-        This method give discontinuous results:
-            if g > 0 ; then take j
-            if g = 0 ; then average between bounds
-        * closest_observation:
-        method 3 of H&F.
-        This method give discontinuous results:
-            if g > 0 ; then take j
-            if g = 0 and index is odd ; then take j
-            if g = 0 and index is even ; then take i
-        * interpolated_inverted_cdf:
-        method 4 of H&F.
-        This method give continuous results using:
-            alpha = 0
-            beta = 1
-        * hazen:
-        method 5 of H&F.
-        This method give continuous results using:
-            alpha = 1/2
-            beta = 1/2
-        * weibull:
-        method 6 of H&F.
-        This method give continuous results using:
-            alpha = 0
-            beta = 0
-        * inclusive:
-        Default method, aliased with "linear".
-        method 7 of H&F.
-        This method give continuous results using:
-            alpha = 1
-            beta = 1
-        * median_unbiased:
-        method 8 of H&F.
-        This method is probably the best method if the sample distribution
-        function is unknown (see reference).
-        This method give continuous results using:
-            alpha = 1/3
-            beta = 1/3
-        * normal_unbiased:
-        method 9 of H&F.
-        This method is probably the best method if the sample distribution
-        function is known to be normal.
-        This method give continuous results using:
-            alpha = 3/8
-            beta = 3/8
-        * lower: ``i``.
-        * higher: ``j``.
-        * nearest: ``i`` or ``j``, whichever is nearest.
-        * midpoint: ``(i + j) / 2``.
+    interpolation : str, optional
+        This parameter specifies the interpolation method to
+        use when the desired quantile lies between two data points
+        There are many different methods, some unique to NumPy. See the
+        notes for explanation. Options
+
+        * (NPY 1): 'lower'
+        * (NPY 2): 'higher',
+        * (NPY 3): 'midpoint'
+        * (NPY 4): 'nearest'
+        * (NPY 5): 'linear', aliased with 'inclusive'  (default)
+
+        New options:
+
+        * (H&F 1): 'inverted_cdf'
+        * (H&F 2): 'averaged_inverted_cdf'
+        * (H&F 3): 'closest_observation'
+        * (H&F 4): 'interpolated_inverted_cdf'
+        * (H&F 5): 'hazen'
+        * (H&F 6): 'weibull'
+        * (H&F 7): 'inclusive',  aliased with 'linear'  (default)
+        * (H&F 8): 'median_unbiased'
+        * (H&F 9): 'normal_unbiased'
+
+        .. versionadded;: 1.22.0
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left in
@@ -3971,18 +3927,104 @@ def percentile(a,
     mean
     median : equivalent to ``percentile(..., 50)``
     nanpercentile
-    quantile : equivalent to percentile, except with q in the range [0, 1].
+    quantile : equivalent to percentile, except q in the range [0, 1].
 
     Notes
     -----
-    Given a vector ``V`` of length ``N``, the q-th percentile of
-    ``V`` is the value ``q/100`` of the way from the minimum to the
-    maximum in a sorted copy of ``V``. The values and distances of
-    the two nearest neighbors as well as the `interpolation` parameter
-    will determine the percentile if the normalized ranking does not
-    match the location of ``q`` exactly. This function is the same as
-    the median if ``q=50``, the same as the minimum if ``q=0`` and the
-    same as the maximum if ``q=100``.
+    Given a vector ``V`` of length ``N``, the q-th percentile of ``V`` is
+    the value ``q/100`` of the way from the minimum to the maximum in a
+    sorted copy of ``V``. The values and distances of the two nearest
+    neighbors as well as the `interpolation` parameter will determine the
+    percentile if the normalized ranking does not match the location of
+    ``q`` exactly. This function is the same as the median if ``q=50``, the
+    same as the minimum if ``q=0`` and the same as the maximum if
+    ``q=100``.
+
+    This optional `interpolation` parameter specifies the interpolation
+    method to use when the desired quantile lies between two data points
+    ``i < j``. If ``g`` is the fractional part of the index surrounded by
+    ``i`` and  alpha and beta are correction constants modifying i and j.
+
+    .. math::
+        i + g = (q - alpha) / ( n - alpha - beta + 1 )
+
+    The different interpolation methods then work as follows
+
+    inverted_cdf:
+        method 1 of H&F [1]_.
+        This method gives discontinuous results:
+        * if g > 0 ; then take j
+        * if g = 0 ; then take i
+
+    averaged_inverted_cdf:
+        method 2 of H&F [1]_.
+        This method give discontinuous results:
+        * if g > 0 ; then take j
+        * if g = 0 ; then average between bounds
+
+    closest_observation:
+        method 3 of H&F [1]_.
+        This method give discontinuous results:
+        * if g > 0 ; then take j
+        * if g = 0 and index is odd ; then take j
+        * if g = 0 and index is even ; then take i
+
+    interpolated_inverted_cdf:
+        method 4 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 0
+        * beta = 1
+
+    hazen:
+        method 5 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 1/2
+        * beta = 1/2
+
+    weibull:
+        method 6 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 0
+        * beta = 0
+
+    inclusive:
+        Default method, aliased with "linear".
+        method 7 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 1
+        * beta = 1
+
+    median_unbiased:
+        method 8 of H&F [1]_.
+        This method is probably the best method if the sample
+        distribution function is unknown (see reference).
+        This method give continuous results using:
+        * alpha = 1/3
+        * beta = 1/3
+
+    normal_unbiased:
+        method 9 of H&F [1]_.
+        This method is probably the best method if the sample
+        distribution function is known to be normal.
+        This method give continuous results using:
+        * alpha = 3/8
+        * beta = 3/8
+
+    lower:
+        NumPy method kept for backwards compatibility.
+        Takes ``i`` as the interpolation point.
+
+    higher:
+        NumPy method kept for backwards compatibility.
+        Takes ``j`` as the interpolation point.
+
+    nearest:
+        NumPy method kept for backwards compatibility.
+        Takes ``i`` or ``j``, whichever is nearest.
+
+    midpoint:
+        NumPy method kept for backwards compatibility.
+        Uses ``(i + j) / 2``.
 
     Examples
     --------
@@ -4042,9 +4084,10 @@ def percentile(a,
 
     References
     ----------
-    [Hyndman&Fan] Hyndman, R. J., & Fan, Y. (1996).
-    Sample quantiles in statistical packages.
-    The American Statistician, 50(4), 361-365.
+    .. [1] R. J. Hyndman and Y. Fan,
+       "Sample quantiles in statistical packages,"
+       The American Statistician, 50(4), pp. 361-365, 1996
+
     """
     q = np.true_divide(q, 100)
     q = asanyarray(q)  # undo any decay that the ufunc performed (see gh-13105)
@@ -4080,85 +4123,42 @@ def quantile(a,
         Quantile or sequence of quantiles to compute, which must be between
         0 and 1 inclusive.
     axis : {int, tuple of int, None}, optional
-        Axis or axes along which the quantiles are computed. The
-        default is to compute the quantile(s) along a flattened
-        version of the array.
+        Axis or axes along which the quantiles are computed. The default is
+        to compute the quantile(s) along a flattened version of the array.
     out : ndarray, optional
-        Alternative output array in which to place the result. It must
-        have the same shape and buffer length as the expected output,
-        but the type (of the output) will be cast if necessary.
+        Alternative output array in which to place the result. It must have
+        the same shape and buffer length as the expected output, but the
+        type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
-        If True, then allow the input array `a` to be modified by intermediate
-        calculations, to save memory. In this case, the contents of the input
-        `a` after this function completes is undefined.
-    interpolation : str
-        Possible values: 'linear' (default),
-                        'inverted_cdf', 'averaged_inverted_cdf',
-                        'closest_observation', 'interpolated_inverted_cdf',
-                        'hazen', 'weibull',
-                        'median_unbiased', 'normal_unbiased',
-                        'lower', 'higher',
-                        'midpoint', 'nearest'.
-        This optional parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points ``i < j``.
-        g is the fractional part of the index surrounded by ``i``.
-        alpha and beta are correction constants modifying i and j:
-        i + g = (q - alpha) / ( n - alpha - beta + 1 )
-        * inverted_cdf:
-            method 1 of H&F.
-            This method give discontinuous results:
-                if g > 0 ; then take j
-                if g = 0 ; then take i
-        * averaged_inverted_cdf:
-            method 2 of H&F.
-            This method give discontinuous results:
-                if g > 0 ; then take j
-                if g = 0 ; then average between bounds
-        * closest_observation:
-            method 3 of H&F.
-            This method give discontinuous results:
-                if g > 0 ; then take j
-                if g = 0 and index is odd ; then take j
-                if g = 0 and index is even ; then take i
-        * interpolated_inverted_cdf:
-            method 4 of H&F.
-            This method give continuous results using:
-                alpha = 0
-                beta = 1
-        * hazen:
-            method 5 of H&F.
-            This method give continuous results using:
-                alpha = 1/2
-                beta = 1/2
-        * weibull:
-            method 6 of H&F.
-            This method give continuous results using:
-                alpha = 0
-                beta = 0
-        * linear:
-            Default method.
-            method 7 of H&F.
-            This method give continuous results using:
-                alpha = 1
-                beta = 1
-        * median_unbiased:
-            method 8 of H&F.
-            This method is probably the best method if the sample distribution
-            function is unknown (see reference).
-            This method give continuous results using:
-                alpha = 1/3
-                beta = 1/3
-        * normal_unbiased:
-            method 9 of H&F.
-            This method is probably the best method if the sample distribution
-            function is known to be normal.
-            This method give continuous results using:
-                alpha = 3/8
-                beta = 3/8
-        * lower: ``i``.
-        * higher: ``j``.
-        * nearest: ``i`` or ``j``, whichever is nearest.
-        * midpoint: ``(i + j) / 2``.
+        If True, then allow the input array `a` to be modified by
+        intermediate calculations, to save memory. In this case, the
+        contents of the input `a` after this function completes is
+        undefined.
+    interpolation : str, optional
+        This parameter specifies the interpolation method to use when the
+        desired quantile lies between two data points There are many
+        different methods, some unique to NumPy. See the notes for
+        explanation. Options:
+
+        * (NPY 1): 'lower'
+        * (NPY 2): 'higher',
+        * (NPY 3): 'midpoint'
+        * (NPY 4): 'nearest'
+        * (NPY 5): 'linear', aliased with 'inclusive'  (default)
+
+        New options:
+
+        * (H&F 1): 'inverted_cdf'
+        * (H&F 2): 'averaged_inverted_cdf'
+        * (H&F 3): 'closest_observation'
+        * (H&F 4): 'interpolated_inverted_cdf'
+        * (H&F 5): 'hazen'
+        * (H&F 6): 'weibull'
+        * (H&F 7): 'inclusive',  aliased with 'linear'  (default)
+        * (H&F 8): 'median_unbiased'
+        * (H&F 9): 'normal_unbiased'
+
+        .. versionadded:: 1.22.0
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left in
@@ -4186,14 +4186,99 @@ def quantile(a,
 
     Notes
     -----
-    Given a vector ``V`` of length ``N``, the q-th quantile of
-    ``V`` is the value ``q`` of the way from the minimum to the
-    maximum in a sorted copy of ``V``. The values and distances of
-    the two nearest neighbors as well as the `interpolation` parameter
-    will determine the quantile if the normalized ranking does not
-    match the location of ``q`` exactly. This function is the same as
-    the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the
-    same as the maximum if ``q=1.0``.
+    Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the
+    value ``q`` of the way from the minimum to the maximum in a sorted copy of
+    ``V``. The values and distances of the two nearest neighbors as well as the
+    `interpolation` parameter will determine the quantile if the normalized
+    ranking does not match the location of ``q`` exactly. This function is the
+    same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and
+    the same as the maximum if ``q=1.0``.
+
+    This optional `interpolation` parameter specifies the interpolation method
+    to use when the desired quantile lies between two data points ``i < j``. If
+    ``g`` is the fractional part of the index surrounded by ``i`` and  alpha
+    and beta are correction constants modifying i and j.
+
+    .. math::
+        i + g = (q - alpha) / ( n - alpha - beta + 1 )
+
+    The different interpolation methods then work as follows
+
+    inverted_cdf:
+        method 1 of H&F [1]_.
+        This method gives discontinuous results:
+        * if g > 0 ; then take j
+        * if g = 0 ; then take i
+
+    averaged_inverted_cdf:
+        method 2 of H&F [1]_.
+        This method give discontinuous results:
+        * if g > 0 ; then take j
+        * if g = 0 ; then average between bounds
+
+    closest_observation:
+        method 3 of H&F [1]_.
+        This method give discontinuous results:
+        * if g > 0 ; then take j
+        * if g = 0 and index is odd ; then take j
+        * if g = 0 and index is even ; then take i
+
+    interpolated_inverted_cdf:
+        method 4 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 0
+        * beta = 1
+
+    hazen:
+        method 5 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 1/2
+        * beta = 1/2
+
+    weibull:
+        method 6 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 0
+        * beta = 0
+
+    inclusive:
+        Default method, aliased with "linear".
+        method 7 of H&F [1]_.
+        This method give continuous results using:
+        * alpha = 1
+        * beta = 1
+
+    median_unbiased:
+        method 8 of H&F [1]_.
+        This method is probably the best method if the sample
+        distribution function is unknown (see reference).
+        This method give continuous results using:
+        * alpha = 1/3
+        * beta = 1/3
+
+    normal_unbiased:
+        method 9 of H&F [1]_.
+        This method is probably the best method if the sample
+        distribution function is known to be normal.
+        This method give continuous results using:
+        * alpha = 3/8
+        * beta = 3/8
+
+    lower:
+        NumPy method kept for backwards compatibility.
+        Takes ``i`` as the interpolation point.
+
+    higher:
+        NumPy method kept for backwards compatibility.
+        Takes ``j`` as the interpolation point.
+
+    nearest:
+        NumPy method kept for backwards compatibility.
+        Takes ``i`` or ``j``, whichever is nearest.
+
+    midpoint:
+        NumPy method kept for backwards compatibility.
+        Uses ``(i + j) / 2``.
 
     Examples
     --------
@@ -4223,9 +4308,10 @@ def quantile(a,
 
     References
     ----------
-    [Hyndman&Fan] Hyndman, R. J., & Fan, Y. (1996).
-    Sample quantiles in statistical packages.
-    The American Statistician, 50(4), 361-365.
+    .. [1] R. J. Hyndman and Y. Fan,
+       "Sample quantiles in statistical packages,"
+       The American Statistician, 50(4), pp. 361-365, 1996
+
     """
     q = np.asanyarray(q)
     if not _quantile_is_valid(q):
-- 
cgit v1.2.1


From ab19ed256bf9b20340c92cebcfd6158241122c88 Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Tue, 19 Oct 2021 12:10:16 +0200
Subject: Fix _lerp

- some changes were unrelated to the PR and have been reverted, including, renaming and moving the logic around.
- Also renamed _quantile_ureduce_func to its original name
---
 numpy/lib/function_base.py | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index dbaba87f9..353490fc2 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -4329,7 +4329,7 @@ def _quantile_unchecked(a,
                         keepdims=False):
     """Assumes that q is in [0, 1], and is an ndarray"""
     r, k = _ureduce(a,
-                    func=_quantiles_ureduce_func,
+                    func=_quantile_ureduce_func,
                     q=q,
                     axis=axis,
                     out=out,
@@ -4383,22 +4383,22 @@ def _get_gamma(virtual_indexes: np.array,
     return np.asanyarray(gamma)
 
 
-def _linear_interpolation_formula(
-        left: np.array, right: np.array, gamma: np.array, out: np.array = None
-) -> np.array:
+def _lerp(a, b, t, out=None):
     """
     Compute the linear interpolation weighted by gamma on each point of
     two same shape array.
     """
-    # Equivalent to gamma * right + (1 - gamma) * left
-    # see gh-14685
-    diff_right_left = subtract(right, left)
-    result = asanyarray(add(left, diff_right_left * gamma, out=out))
-    result = subtract(right,
-                      diff_right_left * (1 - gamma),
-                      out=result,
-                      where=gamma >= 0.5)
-    return result
+    # Equivalent to gamma * right + (1 - gamma) * left, see gh-14685
+    diff_b_a = subtract(b, a)
+    # asanyarray is a stop-gap until gh-13105
+    lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out))
+    lerp_interpolation = subtract(b,
+                                  diff_b_a * (1 - t),
+                                  out=lerp_interpolation,
+                                  where=t >= 0.5)
+    if lerp_interpolation.ndim == 0 and out is None:
+        lerp_interpolation = lerp_interpolation[()]  # unpack 0d arrays
+    return lerp_interpolation
 
 
 def _get_gamma_mask(shape, default_value, conditioned_value, where):
@@ -4430,7 +4430,7 @@ def _inverted_cdf(n, quantiles):
                                                 gamma_fun)
 
 
-def _quantiles_ureduce_func(
+def _quantile_ureduce_func(
         a: np.array,
         q: np.array,
         axis: int = None,
@@ -4460,10 +4460,6 @@ def _quantiles_ureduce_func(
                        axis=axis,
                        interpolation=interpolation,
                        out=out)
-    if result.ndim == 0 and out is None:
-        result = result[()]  # unpack 0d arrays
-    elif result.size == 1 and out is None and q.ndim == 0:
-        result = result[0]
     return result
 
 
@@ -4551,7 +4547,7 @@ def _quantile(
             # cannot contain nan
             arr.partition(virtual_indexes.ravel(), axis=0)
             slices_having_nans = np.array(False, dtype=bool)
-        result = np.asanyarray(take(arr, virtual_indexes, axis=0, out=out))
+        result = take(arr, virtual_indexes, axis=0, out=out)
     else:
         previous_indexes, next_indexes = _get_indexes(arr,
                                                       virtual_indexes,
@@ -4578,10 +4574,10 @@ def _quantile(
                            interpolation)
         result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
         gamma = gamma.reshape(result_shape)
-        result = _linear_interpolation_formula(previous,
-                                               next,
-                                               gamma,
-                                               out=out)
+        result = _lerp(previous,
+                       next,
+                       gamma,
+                       out=out)
     if np.any(slices_having_nans):
         if result.ndim == 0 and out is None:
             # can't write to a scalar
-- 
cgit v1.2.1


From d5e275b2bf65c1848203f6bff7606623793daeed Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Tue, 19 Oct 2021 15:22:05 +0200
Subject: DOC: Improve quantile documentation

Also removed unused imports
---
 numpy/lib/function_base.py | 87 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 67 insertions(+), 20 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 353490fc2..a5ccc1189 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -9,8 +9,7 @@ import numpy.core.numeric as _nx
 from numpy.core import transpose
 from numpy.core.numeric import (
     ones, zeros_like, arange, concatenate, array, asarray, asanyarray, empty,
-    ndarray, around, floor, ceil, take, dot, where, intp,
-    integer, isscalar, absolute
+    ndarray, take, dot, where, intp, integer, isscalar, absolute
     )
 from numpy.core.umath import (
     pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin,
@@ -51,7 +50,22 @@ __all__ = [
     'quantile'
     ]
 
-
+# _QuantileInterpolation is a dictionary listing all the supported
+# interpolation methods to compute quantile/percentile.
+#
+# Below virtual_index refer to the index of the element where the percentile
+# would be found in the sorted sample.
+# When the sample contains exactly the percentile wanted, the virtual_index is
+# an integer to the index of this element.
+# When the percentile wanted is in between two elements, the virtual_index
+# is made of a integer part (a.k.a 'i' or 'left') and a fractional part
+# (a.k.a 'g' or 'gamma')
+#
+# Each _QuantileInterpolation have two properties
+# get_virtual_index : Callable
+#   The function used to compute the virtual_index.
+# fix_gamma : Callable
+#   A function used for discret methods to force the index to a specific value.
 _QuantileInterpolation = dict(
     # --- HYNDMAN and FAN methods
     # Discrete methods
@@ -75,33 +89,33 @@ _QuantileInterpolation = dict(
     # Continuous methods
     interpolated_inverted_cdf=dict(
         get_virtual_index=lambda n, quantiles:
-            _virtual_index_formula(n, quantiles, 0, 1),
+        _compute_virtual_index(n, quantiles, 0, 1),
         fix_gamma=lambda gamma, _: gamma,
     ),
     hazen=dict(
         get_virtual_index=lambda n, quantiles:
-            _virtual_index_formula(n, quantiles, 0.5, 0.5),
+        _compute_virtual_index(n, quantiles, 0.5, 0.5),
         fix_gamma=lambda gamma, _: gamma,
     ),
     weibull=dict(
         get_virtual_index=lambda n, quantiles:
-            _virtual_index_formula(n, quantiles, 0, 0),
+        _compute_virtual_index(n, quantiles, 0, 0),
         fix_gamma=lambda gamma, _: gamma,
     ),
     # Default value
     linear=dict(
         get_virtual_index=lambda n, quantiles:
-            _virtual_index_formula(n, quantiles, 1, 1),
+        _compute_virtual_index(n, quantiles, 1, 1),
         fix_gamma=lambda gamma, _: gamma,
     ),
     median_unbiased=dict(
         get_virtual_index=lambda n, quantiles:
-            _virtual_index_formula(n, quantiles, 1 / 3.0, 1 / 3.0),
+        _compute_virtual_index(n, quantiles, 1 / 3.0, 1 / 3.0),
         fix_gamma=lambda gamma, _: gamma,
     ),
     normal_unbiased=dict(
         get_virtual_index=lambda n, quantiles:
-        _virtual_index_formula(n, quantiles, 3 / 8.0, 3 / 8.0),
+        _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0),
         fix_gamma=lambda gamma, _: gamma,
     ),
     # --- OTHER METHODS fixme add deprecated ?
@@ -3945,6 +3959,12 @@ def percentile(a,
     ``i < j``. If ``g`` is the fractional part of the index surrounded by
     ``i`` and  alpha and beta are correction constants modifying i and j.
 
+    Below, 'q' is the quantile value, 'n' is the samle size and
+    alpha and beta are constants.
+    The following formula gives an interpolation "i + g" of where the quantile
+    would be in the sorted sample.
+    With 'i' being the floor and 'g' the fractional part of the result.
+
     .. math::
         i + g = (q - alpha) / ( n - alpha - beta + 1 )
 
@@ -4353,13 +4373,22 @@ def _quantile_is_valid(q):
     return True
 
 
-def _virtual_index_formula(n: np.array,
-                           quantiles: np.array,
-                           alpha: float,
-                           beta: float) -> np.array:
+def _compute_virtual_index(n, quantiles, alpha: float, beta: float):
     """
     Compute the floating point indexes of an array for the linear
     interpolation of quantiles.
+    n : array_like
+        The sample sizes.
+    quantiles : array_like
+        The quantiles values.
+    alpha : float
+        A constant used to correct the index computed.
+    beta : float
+        A constant used to correct the index computed.
+
+    alpha and beta values depend on the chosen method
+    (see quantile documentation)
+
     Reference:
     Hyndman&Fan paper "Sample Quantiles in Statistical Packages",
     DOI: 10.1080/00031305.1996.10473566
@@ -4369,14 +4398,24 @@ def _virtual_index_formula(n: np.array,
     ) - 1
 
 
-def _get_gamma(virtual_indexes: np.array,
-               previous_indexes: np.array,
-               interpolation: _QuantileInterpolation) -> np.array:
+def _get_gamma(virtual_indexes,
+               previous_indexes,
+               interpolation: _QuantileInterpolation):
     """
-    Compute the gamma (a.k.a 'm' or weight) for the linear interpolation
+    Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation
     of quantiles.
-    When gamma == 0 the left bound will be picked,
-    When gamma == 1 the right bound will be.
+
+    virtual_indexes : array_like
+        The indexes where the percentile is supposed to be found in the sorted
+        sample.
+    previous_indexes : array_like
+        The floor values of virtual_indexes.
+    interpolation : _QuantileInterpolation
+        The interpolation method chosen, which may have a specific rule
+        modifying gamma.
+
+    gamma is usually the fractional part of virtual_indexes but can be modified
+    by the interpolation method.
     """
     gamma = np.asanyarray(virtual_indexes - previous_indexes)
     gamma = interpolation["fix_gamma"](gamma, virtual_indexes)
@@ -4387,8 +4426,16 @@ def _lerp(a, b, t, out=None):
     """
     Compute the linear interpolation weighted by gamma on each point of
     two same shape array.
+
+    a : array_like
+        Left bound.
+    b : array_like
+        Right bound.
+    t : array_like
+        The interpolation weight.
+    out : array_like
+        Output array.
     """
-    # Equivalent to gamma * right + (1 - gamma) * left, see gh-14685
     diff_b_a = subtract(b, a)
     # asanyarray is a stop-gap until gh-13105
     lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out))
-- 
cgit v1.2.1


From 2a5422da7cb6759d75477738cc192fee3ca2a19c Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Tue, 19 Oct 2021 17:55:06 +0200
Subject: Fix issue with nan scalar

Also added unit test for it.
---
 numpy/lib/function_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index a5ccc1189..a16781f53 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3959,7 +3959,7 @@ def percentile(a,
     ``i < j``. If ``g`` is the fractional part of the index surrounded by
     ``i`` and  alpha and beta are correction constants modifying i and j.
 
-    Below, 'q' is the quantile value, 'n' is the samle size and
+    Below, 'q' is the quantile value, 'n' is the sample size and
     alpha and beta are constants.
     The following formula gives an interpolation "i + g" of where the quantile
     would be in the sorted sample.
@@ -4628,7 +4628,7 @@ def _quantile(
     if np.any(slices_having_nans):
         if result.ndim == 0 and out is None:
             # can't write to a scalar
-            result = np.array(np.nan, dtype=arr.dtype)
+            result = arr.dtype.type(np.nan)
         else:
             result[..., slices_having_nans] = np.nan
     return result
-- 
cgit v1.2.1


From 8413b5abf27221fb2bea070871c7cd8f8da5519c Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Thu, 21 Oct 2021 09:59:53 +0200
Subject: MAINT: Clean following PR comments

---
 numpy/lib/function_base.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index a16781f53..c33626df4 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3915,7 +3915,7 @@ def percentile(a,
         * (H&F 8): 'median_unbiased'
         * (H&F 9): 'normal_unbiased'
 
-        .. versionadded;: 1.22.0
+        .. versionchanged:: 1.22.0
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left in
@@ -4439,10 +4439,7 @@ def _lerp(a, b, t, out=None):
     diff_b_a = subtract(b, a)
     # asanyarray is a stop-gap until gh-13105
     lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out))
-    lerp_interpolation = subtract(b,
-                                  diff_b_a * (1 - t),
-                                  out=lerp_interpolation,
-                                  where=t >= 0.5)
+    subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t >= 0.5)
     if lerp_interpolation.ndim == 0 and out is None:
         lerp_interpolation = lerp_interpolation[()]  # unpack 0d arrays
     return lerp_interpolation
@@ -4580,7 +4577,7 @@ def _quantile(
     except KeyError:
         raise ValueError(
             f"{interpolation!r} is not a valid interpolation. Use one of: "
-            f"{_QuantileInterpolation.keys()}")
+            f"{_QuantileInterpolation.keys()}") from None
     virtual_indexes = interpolation["get_virtual_index"](values_count,
                                                          quantiles)
     virtual_indexes = np.asanyarray(virtual_indexes)
-- 
cgit v1.2.1


From f7911c67176c1d370be27726e87195699e4b581e Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Thu, 4 Nov 2021 13:26:37 -0500
Subject: DOC: Refer to the quantile/percentile notes for nan versions

---
 numpy/lib/function_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index c33626df4..07d8a4269 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3893,7 +3893,7 @@ def percentile(a,
         `a` after this function completes is undefined.
     interpolation : str, optional
         This parameter specifies the interpolation method to
-        use when the desired quantile lies between two data points
+        use when the desired percentile lies between two data points
         There are many different methods, some unique to NumPy. See the
         notes for explanation. Options
 
-- 
cgit v1.2.1


From f9c2573898899ee3809dfd4ebba113f6de2d528b Mon Sep 17 00:00:00 2001
From: Antony Lee <anntzer.lee@gmail.com>
Date: Fri, 5 Nov 2021 00:05:04 +0100
Subject: PERF: Speedup np.quantile.

Avoiding the unnecessary calls to moveaxis() speed up
`np.quantile(x, .5)` (`x = np.random.rand(1000)`) by ~10% (although
there's a lot of variability) for me.
---
 numpy/lib/function_base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 07d8a4269..4d57acdb2 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -4567,7 +4567,8 @@ def _quantile(
     # The dimensions of `q` are prepended to the output shape, so we need the
     # axis being sampled from `arr` to be last.
     DATA_AXIS = 0
-    arr = np.moveaxis(arr, axis, destination=DATA_AXIS)
+    if axis != DATA_AXIS:  # But moveaxis is slow, so only call it if axis!=0.
+        arr = np.moveaxis(arr, axis, destination=DATA_AXIS)
     # --- Computation of indexes
     # Index where to find the value in the sorted array.
     # Virtual because it is a floating point value, not an valid index.
-- 
cgit v1.2.1


From 035d853e32d6e60a40a6a845699723238a01431b Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Mon, 8 Nov 2021 17:35:19 +0100
Subject: DOC: Remove non-existent alias

---
 numpy/lib/function_base.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 4d57acdb2..86125168a 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3901,7 +3901,7 @@ def percentile(a,
         * (NPY 2): 'higher',
         * (NPY 3): 'midpoint'
         * (NPY 4): 'nearest'
-        * (NPY 5): 'linear', aliased with 'inclusive'  (default)
+        * (NPY 5): 'linear'
 
         New options:
 
@@ -3911,7 +3911,7 @@ def percentile(a,
         * (H&F 4): 'interpolated_inverted_cdf'
         * (H&F 5): 'hazen'
         * (H&F 6): 'weibull'
-        * (H&F 7): 'inclusive',  aliased with 'linear'  (default)
+        * (H&F 7): 'linear'  (default)
         * (H&F 8): 'median_unbiased'
         * (H&F 9): 'normal_unbiased'
 
@@ -4007,8 +4007,7 @@ def percentile(a,
         * alpha = 0
         * beta = 0
 
-    inclusive:
-        Default method, aliased with "linear".
+    linear:
         method 7 of H&F [1]_.
         This method give continuous results using:
         * alpha = 1
@@ -4164,7 +4163,7 @@ def quantile(a,
         * (NPY 2): 'higher',
         * (NPY 3): 'midpoint'
         * (NPY 4): 'nearest'
-        * (NPY 5): 'linear', aliased with 'inclusive'  (default)
+        * (NPY 5): 'linear'
 
         New options:
 
@@ -4174,7 +4173,7 @@ def quantile(a,
         * (H&F 4): 'interpolated_inverted_cdf'
         * (H&F 5): 'hazen'
         * (H&F 6): 'weibull'
-        * (H&F 7): 'inclusive',  aliased with 'linear'  (default)
+        * (H&F 7): 'linear'  (default)
         * (H&F 8): 'median_unbiased'
         * (H&F 9): 'normal_unbiased'
 
@@ -4261,8 +4260,7 @@ def quantile(a,
         * alpha = 0
         * beta = 0
 
-    inclusive:
-        Default method, aliased with "linear".
+    linear:
         method 7 of H&F [1]_.
         This method give continuous results using:
         * alpha = 1
-- 
cgit v1.2.1


From 6cd68755c6fd0686dd57a9daec43d8aa09d15c3e Mon Sep 17 00:00:00 2001
From: abel <aoun@cerfacs.fr>
Date: Tue, 9 Nov 2021 10:35:12 +0100
Subject: MTH: Update quantile default lerp method

For method 7 of H&F, using `(n - 1) * quantiles`
instead of the usual method gives a more accurate
result.
---
 numpy/lib/function_base.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'numpy/lib/function_base.py')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 86125168a..3c9983edf 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -67,7 +67,7 @@ __all__ = [
 # fix_gamma : Callable
 #   A function used for discret methods to force the index to a specific value.
 _QuantileInterpolation = dict(
-    # --- HYNDMAN and FAN methods
+    # --- HYNDMAN AND FAN METHODS
     # Discrete methods
     inverted_cdf=dict(
         get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
@@ -102,10 +102,12 @@ _QuantileInterpolation = dict(
         _compute_virtual_index(n, quantiles, 0, 0),
         fix_gamma=lambda gamma, _: gamma,
     ),
-    # Default value
+    # Default method.
+    # To avoid some rounding issues, `(n-1) * quantiles` is preferred to
+    # `_compute_virtual_index(n, quantiles, 1, 1)`.
+    # They are mathematically equivalent.
     linear=dict(
-        get_virtual_index=lambda n, quantiles:
-        _compute_virtual_index(n, quantiles, 1, 1),
+        get_virtual_index=lambda n, quantiles: (n - 1) * quantiles,
         fix_gamma=lambda gamma, _: gamma,
     ),
     median_unbiased=dict(
@@ -118,7 +120,7 @@ _QuantileInterpolation = dict(
         _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0),
         fix_gamma=lambda gamma, _: gamma,
     ),
-    # --- OTHER METHODS fixme add deprecated ?
+    # --- OTHER METHODS
     lower=dict(
         get_virtual_index=lambda n, quantiles: np.floor(
             (n - 1) * quantiles).astype(np.intp),
-- 
cgit v1.2.1