From cd7a02a4db7e760b881f3feeb832ffd84fa8645a Mon Sep 17 00:00:00 2001 From: abel Date: Thu, 2 Sep 2021 16:34:42 +0200 Subject: MAINT, ENH [#10736] Add interpolation methods to quantile - Added the missing linear interpolation methods. - Updated the existing unit tests. - Added pytest.mark.xfail for boolean arrays See - https://github.com/numpy/numpy/pull/19857#issuecomment-919258693 - https://github.com/numpy/numpy/issues/19154 --- numpy/lib/nanfunctions.py | 212 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 178 insertions(+), 34 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 08d9b42bb..e90c19b4a 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -23,6 +23,7 @@ Functions import functools import warnings import numpy as np +from numpy.lib.function_base import _QuantileInterpolation from numpy.lib import function_base from numpy.core import overrides @@ -1229,8 +1230,15 @@ def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, @array_function_dispatch(_nanpercentile_dispatcher) -def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): +def nanpercentile( + a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation="linear", + keepdims=np._NoValue, +): """ Compute the qth percentile of the data along the specified axis, while ignoring nan values. @@ -1259,18 +1267,74 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + interpolation : str + Possible values: 'linear' (default), + 'inverted_cdf', 'averaged_inverted_cdf', + 'closest_observation', 'interpolated_inverted_cdf', + 'hazen', 'weibull', + 'median_unbiased', 'normal_unbiased', + 'lower', 'higher', + 'midpoint', 'nearest'. This optional parameter specifies the interpolation method to - use when the desired percentile lies between two data points - ``i < j``: - - * 'linear': ``i + (j - i) * fraction``, where ``fraction`` - is the fractional part of the index surrounded by ``i`` - and ``j``. - * 'lower': ``i``. - * 'higher': ``j``. - * 'nearest': ``i`` or ``j``, whichever is nearest. - * 'midpoint': ``(i + j) / 2``. + use when the desired quantile lies between two data points ``i < j``. + g is the fractional part of the index surrounded by ``i``. + alpha and beta are correction constants modifying i and j: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + * inverted_cdf: + method 1 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then take i + * averaged_inverted_cdf: + method 2 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then average between bounds + * closest_observation: + method 3 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 and index is odd ; then take j + if g = 0 and index is even ; then take i + * interpolated_inverted_cdf: + method 4 of H&F. + This method give continuous results using: + alpha = 0 + beta = 1 + * hazen: + method 5 of H&F. + This method give continuous results using: + alpha = 1/2 + beta = 1/2 + * weibull: + method 6 of H&F. + This method give continuous results using: + alpha = 0 + beta = 0 + * linear: + Default method. + method 7 of H&F. + This method give continuous results using: + alpha = 1 + beta = 1 + * median_unbiased: + method 8 of H&F. + This method is probably the best method if the sample distribution + function is unknown (see reference). + This method give continuous results using: + alpha = 1/3 + beta = 1/3 + * normal_unbiased: + method 9 of H&F. + This method is probably the best method if the sample distribution + function is known to be normal. + This method give continuous results using: + alpha = 3/8 + beta = 3/8 + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1342,7 +1406,9 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, """ a = np.asanyarray(a) - q = np.true_divide(q, 100.0) # handles the asarray for us too + q = np.true_divide(q, 100.0) + # undo any decay that the ufunc performed (see gh-13105) + q = np.asanyarray(q) if not function_base._quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _nanquantile_unchecked( @@ -1355,8 +1421,15 @@ def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, @array_function_dispatch(_nanquantile_dispatcher) -def nanquantile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): +def nanquantile( + a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation="linear", + keepdims=np._NoValue, +): """ Compute the qth quantile of the data along the specified axis, while ignoring nan values. @@ -1384,19 +1457,74 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + interpolation : str + Possible values: 'linear' (default), + 'inverted_cdf', 'averaged_inverted_cdf', + 'closest_observation', 'interpolated_inverted_cdf', + 'hazen', 'weibull', + 'median_unbiased', 'normal_unbiased', + 'lower', 'higher', + 'midpoint', 'nearest'. This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` - is the fractional part of the index surrounded by ``i`` - and ``j``. + use when the desired quantile lies between two data points ``i < j``. + g is the fractional part of the index surrounded by ``i``. + alpha and beta are correction constants modifying i and j: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + * inverted_cdf: + method 1 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then take i + * averaged_inverted_cdf: + method 2 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then average between bounds + * closest_observation: + method 3 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 and index is odd ; then take j + if g = 0 and index is even ; then take i + * interpolated_inverted_cdf: + method 4 of H&F. + This method give continuous results using: + alpha = 0 + beta = 1 + * hazen: + method 5 of H&F. + This method give continuous results using: + alpha = 1/2 + beta = 1/2 + * weibull: + method 6 of H&F. + This method give continuous results using: + alpha = 0 + beta = 0 + * linear: + Default method. + method 7 of H&F. + This method give continuous results using: + alpha = 1 + beta = 1 + * median_unbiased: + method 8 of H&F. + This method is probably the best method if the sample distribution + function is unknown (see reference). + This method give continuous results using: + alpha = 1/3 + beta = 1/3 + * normal_unbiased: + method 9 of H&F. + This method is probably the best method if the sample distribution + function is known to be normal. + This method give continuous results using: + alpha = 3/8 + beta = 3/8 * lower: ``i``. * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. - keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1462,26 +1590,39 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) -def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): +def _nanquantile_unchecked( + a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation="linear", + keepdims=np._NoValue, +): """Assumes that q is in [0, 1], and is an ndarray""" # apply_along_axis in _nanpercentile doesn't handle empty arrays well, # so deal them upfront if a.size == 0: return np.nanmean(a, axis, out=out, keepdims=keepdims) - - r, k = function_base._ureduce( - a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out, - overwrite_input=overwrite_input, interpolation=interpolation - ) + r, k = function_base._ureduce(a, + func=_nanquantile_ureduce_func, + q=q, + axis=axis, + out=out, + overwrite_input=overwrite_input, + interpolation=interpolation) if keepdims and keepdims is not np._NoValue: return r.reshape(q.shape + k) else: return r -def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear'): +def _nanquantile_ureduce_func(a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation= "linear"): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -1504,7 +1645,10 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, return result -def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): +def _nanquantile_1d(arr1d, + q, + overwrite_input=False, + interpolation= "linear"): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage -- cgit v1.2.1 From 303c12cfe7ad1b8b6ed5417c126857b29355b1fb Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Fri, 8 Oct 2021 17:09:51 -0600 Subject: DOC: fix docstrings. Hopefully fix the docstrings of percentile, nanpercentile, quantile, and nanquantile so that CircleCI passes. --- numpy/lib/nanfunctions.py | 424 +++++++++++++++++++++++++++++----------------- 1 file changed, 268 insertions(+), 156 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index e90c19b4a..710242d59 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1253,88 +1253,47 @@ def nanpercentile( Input array or object that can be converted to an array, containing nan values to be ignored. q : array_like of float - Percentile or sequence of percentiles to compute, which must be between - 0 and 100 inclusive. + Percentile or sequence of percentiles to compute, which must be + between 0 and 100 inclusive. axis : {int, tuple of int, None}, optional - Axis or axes along which the percentiles are computed. The - default is to compute the percentile(s) along a flattened - version of the array. + Axis or axes along which the percentiles are computed. The default + is to compute the percentile(s) along a flattened version of the + array. out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output, - but the type (of the output) will be cast if necessary. + Alternative output array in which to place the result. It must have + the same shape and buffer length as the expected output, but the + type (of the output) will be cast if necessary. overwrite_input : bool, optional - If True, then allow the input array `a` to be modified by intermediate - calculations, to save memory. In this case, the contents of the input - `a` after this function completes is undefined. - interpolation : str - Possible values: 'linear' (default), - 'inverted_cdf', 'averaged_inverted_cdf', - 'closest_observation', 'interpolated_inverted_cdf', - 'hazen', 'weibull', - 'median_unbiased', 'normal_unbiased', - 'lower', 'higher', - 'midpoint', 'nearest'. - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points ``i < j``. - g is the fractional part of the index surrounded by ``i``. - alpha and beta are correction constants modifying i and j: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - * inverted_cdf: - method 1 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then take i - * averaged_inverted_cdf: - method 2 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then average between bounds - * closest_observation: - method 3 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 and index is odd ; then take j - if g = 0 and index is even ; then take i - * interpolated_inverted_cdf: - method 4 of H&F. - This method give continuous results using: - alpha = 0 - beta = 1 - * hazen: - method 5 of H&F. - This method give continuous results using: - alpha = 1/2 - beta = 1/2 - * weibull: - method 6 of H&F. - This method give continuous results using: - alpha = 0 - beta = 0 - * linear: - Default method. - method 7 of H&F. - This method give continuous results using: - alpha = 1 - beta = 1 - * median_unbiased: - method 8 of H&F. - This method is probably the best method if the sample distribution - function is unknown (see reference). - This method give continuous results using: - alpha = 1/3 - beta = 1/3 - * normal_unbiased: - method 9 of H&F. - This method is probably the best method if the sample distribution - function is known to be normal. - This method give continuous results using: - alpha = 3/8 - beta = 3/8 - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + interpolation : str, optional + This parameter specifies the interpolation method to use when the + desired quantile lies between two data points There are many + different methods, some unique to NumPy. See the notes for + explanation. Options: + + * (NPY 1): 'lower' + * (NPY 2): 'higher', + * (NPY 3): 'midpoint' + * (NPY 4): 'nearest' + * (NPY 5): 'linear', aliased with 'inclusive' (default) + + New options: + + * (H&F 1): 'inverted_cdf' + * (H&F 2): 'averaged_inverted_cdf' + * (H&F 3): 'closest_observation' + * (H&F 4): 'interpolated_inverted_cdf' + * (H&F 5): 'hazen' + * (H&F 6): 'weibull' + * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 8): 'median_unbiased' + * (H&F 9): 'normal_unbiased' + + .. versionadded:: 1.22.0 + keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1363,18 +1322,104 @@ def nanpercentile( nanmean nanmedian : equivalent to ``nanpercentile(..., 50)`` percentile, median, mean - nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. + nanquantile : equivalent to nanpercentile, except q in range [0, 1]. Notes ----- - Given a vector ``V`` of length ``N``, the ``q``-th percentile of - ``V`` is the value ``q/100`` of the way from the minimum to the - maximum in a sorted copy of ``V``. The values and distances of - the two nearest neighbors as well as the `interpolation` parameter - will determine the percentile if the normalized ranking does not - match the location of ``q`` exactly. This function is the same as - the median if ``q=50``, the same as the minimum if ``q=0`` and the - same as the maximum if ``q=100``. + Given a vector ``V`` of length ``N``, the ``q``-th percentile of ``V`` + is the value ``q/100`` of the way from the minimum to the maximum in a + sorted copy of ``V``. The values and distances of the two nearest + neighbors as well as the `interpolation` parameter will determine the + percentile if the normalized ranking does not match the location of + ``q`` exactly. This function is the same as the median if ``q=50``, the + same as the minimum if ``q=0`` and the same as the maximum if + ``q=100``. + + This optional `interpolation` parameter specifies the interpolation + method to use when the desired quantile lies between two data points + ``i < j``. If ``g`` is the fractional part of the index surrounded by + ``i`` and alpha and beta are correction constants modifying i and j. + + .. math:: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + + The different interpolation methods then work as follows + + inverted_cdf: + method 1 of H&F [1]_. + This method gives discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then take i + + averaged_inverted_cdf: + method 2 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then average between bounds + + closest_observation: + method 3 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 and index is odd ; then take j + * if g = 0 and index is even ; then take i + + interpolated_inverted_cdf: + method 4 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 1 + + hazen: + method 5 of H&F [1]_. + This method give continuous results using: + * alpha = 1/2 + * beta = 1/2 + + weibull: + method 6 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 0 + + inclusive: + Default method, aliased with "linear". + method 7 of H&F [1]_. + This method give continuous results using: + * alpha = 1 + * beta = 1 + + median_unbiased: + method 8 of H&F [1]_. + This method is probably the best method if the sample + distribution function is unknown (see reference). + This method give continuous results using: + * alpha = 1/3 + * beta = 1/3 + + normal_unbiased: + method 9 of H&F [1]_. + This method is probably the best method if the sample + distribution function is known to be normal. + This method give continuous results using: + * alpha = 3/8 + * beta = 3/8 + + lower: + NumPy method kept for backwards compatibility. + Takes ``i`` as the interpolation point. + + higher: + NumPy method kept for backwards compatibility. + Takes ``j`` as the interpolation point. + + nearest: + NumPy method kept for backwards compatibility. + Takes ``i`` or ``j``, whichever is nearest. + + midpoint: + NumPy method kept for backwards compatibility. + Uses ``(i + j) / 2``. Examples -------- @@ -1404,6 +1449,12 @@ def nanpercentile( array([7., 2.]) >>> assert not np.all(a==b) + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ a = np.asanyarray(a) q = np.true_divide(q, 100.0) @@ -1457,74 +1508,32 @@ def nanquantile( If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : str - Possible values: 'linear' (default), - 'inverted_cdf', 'averaged_inverted_cdf', - 'closest_observation', 'interpolated_inverted_cdf', - 'hazen', 'weibull', - 'median_unbiased', 'normal_unbiased', - 'lower', 'higher', - 'midpoint', 'nearest'. - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points ``i < j``. - g is the fractional part of the index surrounded by ``i``. - alpha and beta are correction constants modifying i and j: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - * inverted_cdf: - method 1 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then take i - * averaged_inverted_cdf: - method 2 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then average between bounds - * closest_observation: - method 3 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 and index is odd ; then take j - if g = 0 and index is even ; then take i - * interpolated_inverted_cdf: - method 4 of H&F. - This method give continuous results using: - alpha = 0 - beta = 1 - * hazen: - method 5 of H&F. - This method give continuous results using: - alpha = 1/2 - beta = 1/2 - * weibull: - method 6 of H&F. - This method give continuous results using: - alpha = 0 - beta = 0 - * linear: - Default method. - method 7 of H&F. - This method give continuous results using: - alpha = 1 - beta = 1 - * median_unbiased: - method 8 of H&F. - This method is probably the best method if the sample distribution - function is unknown (see reference). - This method give continuous results using: - alpha = 1/3 - beta = 1/3 - * normal_unbiased: - method 9 of H&F. - This method is probably the best method if the sample distribution - function is known to be normal. - This method give continuous results using: - alpha = 3/8 - beta = 3/8 - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + interpolation : str, optional + This parameter specifies the interpolation method to + use when the desired quantile lies between two data points + There are many different methods, some unique to NumPy. See the + notes for explanation. Options: + + * (NPY 1): 'lower' + * (NPY 2): 'higher', + * (NPY 3): 'midpoint' + * (NPY 4): 'nearest' + * (NPY 5): 'linear', aliased with 'inclusive' (default) + + New options: + + * (H&F 1): 'inverted_cdf' + * (H&F 2): 'averaged_inverted_cdf' + * (H&F 3): 'closest_observation' + * (H&F 4): 'interpolated_inverted_cdf' + * (H&F 5): 'hazen' + * (H&F 6): 'weibull' + * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 8): 'median_unbiased' + * (H&F 9): 'normal_unbiased' + + .. versionadded;: 1.22.0 + keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1555,6 +1564,102 @@ def nanquantile( nanmedian : equivalent to ``nanquantile(..., 0.5)`` nanpercentile : same as nanquantile, but with q in the range [0, 100]. + Notes + ----- + Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the + value ``q`` of the way from the minimum to the maximum in a sorted copy of + ``V``. The values and distances of the two nearest neighbors as well as the + `interpolation` parameter will determine the quantile if the normalized + ranking does not match the location of ``q`` exactly. This function is the + same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and + the same as the maximum if ``q=1.0``. + + This optional `interpolation` parameter specifies the interpolation method + to use when the desired quantile lies between two data points ``i < j``. If + ``g`` is the fractional part of the index surrounded by ``i`` and alpha + and beta are correction constants modifying i and j. + + .. math:: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + + The different interpolation methods then work as follows + + inverted_cdf: + method 1 of H&F [1]_. + This method gives discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then take i + + averaged_inverted_cdf: + method 2 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then average between bounds + + closest_observation: + method 3 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 and index is odd ; then take j + * if g = 0 and index is even ; then take i + + interpolated_inverted_cdf: + method 4 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 1 + + hazen: + method 5 of H&F [1]_. + This method give continuous results using: + * alpha = 1/2 + * beta = 1/2 + + weibull: + method 6 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 0 + + inclusive: + Default method, aliased with "linear". + method 7 of H&F [1]_. + This method give continuous results using: + * alpha = 1 + * beta = 1 + + median_unbiased: + method 8 of H&F [1]_. + This method is probably the best method if the sample + distribution function is unknown (see reference). + This method give continuous results using: + * alpha = 1/3 + * beta = 1/3 + + normal_unbiased: + method 9 of H&F [1]_. + This method is probably the best method if the sample + distribution function is known to be normal. + This method give continuous results using: + * alpha = 3/8 + * beta = 3/8 + + lower: + NumPy method kept for backwards compatibility. + Takes ``i`` as the interpolation point. + + higher: + NumPy method kept for backwards compatibility. + Takes ``j`` as the interpolation point. + + nearest: + NumPy method kept for backwards compatibility. + Takes ``i`` or ``j``, whichever is nearest. + + midpoint: + NumPy method kept for backwards compatibility. + Uses ``(i + j) / 2``. + Examples -------- >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) @@ -1581,6 +1686,13 @@ def nanquantile( >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) array([7., 2.]) >>> assert not np.all(a==b) + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ a = np.asanyarray(a) q = np.asanyarray(q) -- cgit v1.2.1 From 8413b5abf27221fb2bea070871c7cd8f8da5519c Mon Sep 17 00:00:00 2001 From: abel Date: Thu, 21 Oct 2021 09:59:53 +0200 Subject: MAINT: Clean following PR comments --- numpy/lib/nanfunctions.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 710242d59..c3333a83a 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1532,7 +1532,7 @@ def nanquantile( * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' - .. versionadded;: 1.22.0 + .. versionchanged:: 1.22.0 keepdims : bool, optional If this is set to True, the axes which are reduced are left in @@ -1729,12 +1729,8 @@ def _nanquantile_unchecked( return r -def _nanquantile_ureduce_func(a, - q, - axis=None, - out=None, - overwrite_input=False, - interpolation= "linear"): +def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, + interpolation="linear"): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -1757,10 +1753,7 @@ def _nanquantile_ureduce_func(a, return result -def _nanquantile_1d(arr1d, - q, - overwrite_input=False, - interpolation= "linear"): +def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation="linear"): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage -- cgit v1.2.1 From 1574011425611a0e43715f81c494004c26b95e92 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 1 Nov 2021 18:00:20 -0500 Subject: MAINT: Remove unnecessary import of _QuantileInterpolation --- numpy/lib/nanfunctions.py | 1 - 1 file changed, 1 deletion(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index c3333a83a..3189d2369 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -23,7 +23,6 @@ Functions import functools import warnings import numpy as np -from numpy.lib.function_base import _QuantileInterpolation from numpy.lib import function_base from numpy.core import overrides -- cgit v1.2.1 From f7911c67176c1d370be27726e87195699e4b581e Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 4 Nov 2021 13:26:37 -0500 Subject: DOC: Refer to the quantile/percentile notes for nan versions --- numpy/lib/nanfunctions.py | 203 +--------------------------------------------- 1 file changed, 3 insertions(+), 200 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 3189d2369..9fab77f45 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1269,7 +1269,7 @@ def nanpercentile( undefined. interpolation : str, optional This parameter specifies the interpolation method to use when the - desired quantile lies between two data points There are many + desired percentile lies between two data points There are many different methods, some unique to NumPy. See the notes for explanation. Options: @@ -1325,100 +1325,7 @@ def nanpercentile( Notes ----- - Given a vector ``V`` of length ``N``, the ``q``-th percentile of ``V`` - is the value ``q/100`` of the way from the minimum to the maximum in a - sorted copy of ``V``. The values and distances of the two nearest - neighbors as well as the `interpolation` parameter will determine the - percentile if the normalized ranking does not match the location of - ``q`` exactly. This function is the same as the median if ``q=50``, the - same as the minimum if ``q=0`` and the same as the maximum if - ``q=100``. - - This optional `interpolation` parameter specifies the interpolation - method to use when the desired quantile lies between two data points - ``i < j``. If ``g`` is the fractional part of the index surrounded by - ``i`` and alpha and beta are correction constants modifying i and j. - - .. math:: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - - The different interpolation methods then work as follows - - inverted_cdf: - method 1 of H&F [1]_. - This method gives discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then take i - - averaged_inverted_cdf: - method 2 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then average between bounds - - closest_observation: - method 3 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 and index is odd ; then take j - * if g = 0 and index is even ; then take i - - interpolated_inverted_cdf: - method 4 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 1 - - hazen: - method 5 of H&F [1]_. - This method give continuous results using: - * alpha = 1/2 - * beta = 1/2 - - weibull: - method 6 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 0 - - inclusive: - Default method, aliased with "linear". - method 7 of H&F [1]_. - This method give continuous results using: - * alpha = 1 - * beta = 1 - - median_unbiased: - method 8 of H&F [1]_. - This method is probably the best method if the sample - distribution function is unknown (see reference). - This method give continuous results using: - * alpha = 1/3 - * beta = 1/3 - - normal_unbiased: - method 9 of H&F [1]_. - This method is probably the best method if the sample - distribution function is known to be normal. - This method give continuous results using: - * alpha = 3/8 - * beta = 3/8 - - lower: - NumPy method kept for backwards compatibility. - Takes ``i`` as the interpolation point. - - higher: - NumPy method kept for backwards compatibility. - Takes ``j`` as the interpolation point. - - nearest: - NumPy method kept for backwards compatibility. - Takes ``i`` or ``j``, whichever is nearest. - - midpoint: - NumPy method kept for backwards compatibility. - Uses ``(i + j) / 2``. + For more information please see `numpy.percentile` Examples -------- @@ -1448,12 +1355,6 @@ def nanpercentile( array([7., 2.]) >>> assert not np.all(a==b) - References - ---------- - .. [1] R. J. Hyndman and Y. Fan, - "Sample quantiles in statistical packages," - The American Statistician, 50(4), pp. 361-365, 1996 - """ a = np.asanyarray(a) q = np.true_divide(q, 100.0) @@ -1565,99 +1466,7 @@ def nanquantile( Notes ----- - Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the - value ``q`` of the way from the minimum to the maximum in a sorted copy of - ``V``. The values and distances of the two nearest neighbors as well as the - `interpolation` parameter will determine the quantile if the normalized - ranking does not match the location of ``q`` exactly. This function is the - same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and - the same as the maximum if ``q=1.0``. - - This optional `interpolation` parameter specifies the interpolation method - to use when the desired quantile lies between two data points ``i < j``. If - ``g`` is the fractional part of the index surrounded by ``i`` and alpha - and beta are correction constants modifying i and j. - - .. math:: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - - The different interpolation methods then work as follows - - inverted_cdf: - method 1 of H&F [1]_. - This method gives discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then take i - - averaged_inverted_cdf: - method 2 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then average between bounds - - closest_observation: - method 3 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 and index is odd ; then take j - * if g = 0 and index is even ; then take i - - interpolated_inverted_cdf: - method 4 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 1 - - hazen: - method 5 of H&F [1]_. - This method give continuous results using: - * alpha = 1/2 - * beta = 1/2 - - weibull: - method 6 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 0 - - inclusive: - Default method, aliased with "linear". - method 7 of H&F [1]_. - This method give continuous results using: - * alpha = 1 - * beta = 1 - - median_unbiased: - method 8 of H&F [1]_. - This method is probably the best method if the sample - distribution function is unknown (see reference). - This method give continuous results using: - * alpha = 1/3 - * beta = 1/3 - - normal_unbiased: - method 9 of H&F [1]_. - This method is probably the best method if the sample - distribution function is known to be normal. - This method give continuous results using: - * alpha = 3/8 - * beta = 3/8 - - lower: - NumPy method kept for backwards compatibility. - Takes ``i`` as the interpolation point. - - higher: - NumPy method kept for backwards compatibility. - Takes ``j`` as the interpolation point. - - nearest: - NumPy method kept for backwards compatibility. - Takes ``i`` or ``j``, whichever is nearest. - - midpoint: - NumPy method kept for backwards compatibility. - Uses ``(i + j) / 2``. + For more information please see `numpy.quantile` Examples -------- @@ -1686,12 +1495,6 @@ def nanquantile( array([7., 2.]) >>> assert not np.all(a==b) - References - ---------- - .. [1] R. J. Hyndman and Y. Fan, - "Sample quantiles in statistical packages," - The American Statistician, 50(4), pp. 361-365, 1996 - """ a = np.asanyarray(a) q = np.asanyarray(q) -- cgit v1.2.1 From 035d853e32d6e60a40a6a845699723238a01431b Mon Sep 17 00:00:00 2001 From: abel Date: Mon, 8 Nov 2021 17:35:19 +0100 Subject: DOC: Remove non-existent alias --- numpy/lib/nanfunctions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 9fab77f45..7e953be03 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1277,7 +1277,7 @@ def nanpercentile( * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' (default) New options: @@ -1287,7 +1287,7 @@ def nanpercentile( * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' @@ -1418,7 +1418,7 @@ def nanquantile( * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' (default) New options: @@ -1428,7 +1428,7 @@ def nanquantile( * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' -- cgit v1.2.1