From 3993408877ab414cb5e3639ac0e20fdec972933f Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 8 Nov 2021 15:38:30 -0600 Subject: API,DEP: Rename percentile/quantile `interpolation=` to `method=` --- numpy/lib/nanfunctions.py | 109 +++++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 7e953be03..4613c1d26 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1223,8 +1223,9 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu return r -def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, - interpolation=None, keepdims=None): +def _nanpercentile_dispatcher( + a, q, axis=None, out=None, overwrite_input=None, + method=None, keepdims=None, *, interpolation=None): return (a, q, out) @@ -1235,8 +1236,10 @@ def nanpercentile( axis=None, out=None, overwrite_input=False, - interpolation="linear", + method="linear", keepdims=np._NoValue, + *, + interpolation=None, ): """ Compute the qth percentile of the data along the specified axis, @@ -1267,19 +1270,11 @@ def nanpercentile( intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : str, optional - This parameter specifies the interpolation method to use when the - desired percentile lies between two data points There are many - different methods, some unique to NumPy. See the notes for - explanation. Options: - - * (NPY 1): 'lower' - * (NPY 2): 'higher', - * (NPY 3): 'midpoint' - * (NPY 4): 'nearest' - * (NPY 5): 'linear' (default) - - New options: + method : str, optional + This parameter specifies the method to use for estimating the + percentile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options aligning with the R types + and the H&F paper [1]_ are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' @@ -1291,7 +1286,17 @@ def nanpercentile( * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' - .. versionadded:: 1.22.0 + Mainly for compatibility reasons, NumPy also supports the following + options which appear to be unique to NumPy: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. keepdims : bool, optional If this is set to True, the axes which are reduced are left in @@ -1304,6 +1309,11 @@ def nanpercentile( a sub-class and `mean` does not have the kwarg `keepdims` this will raise a RuntimeError. + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + Returns ------- percentile : scalar or ndarray @@ -1356,6 +1366,10 @@ def nanpercentile( >>> assert not np.all(a==b) """ + if interpolation is not None: + method = function_base._check_interpolation_as_method( + method, interpolation, "nanpercentile") + a = np.asanyarray(a) q = np.true_divide(q, 100.0) # undo any decay that the ufunc performed (see gh-13105) @@ -1363,11 +1377,11 @@ def nanpercentile( if not function_base._quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _nanquantile_unchecked( - a, q, axis, out, overwrite_input, interpolation, keepdims) + a, q, axis, out, overwrite_input, method, keepdims) def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, - interpolation=None, keepdims=None): + method=None, keepdims=None, *, interpolation=None): return (a, q, out) @@ -1378,8 +1392,10 @@ def nanquantile( axis=None, out=None, overwrite_input=False, - interpolation="linear", + method="linear", keepdims=np._NoValue, + *, + interpolation=None, ): """ Compute the qth quantile of the data along the specified axis, @@ -1408,19 +1424,11 @@ def nanquantile( If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : str, optional - This parameter specifies the interpolation method to - use when the desired quantile lies between two data points - There are many different methods, some unique to NumPy. See the - notes for explanation. Options: - - * (NPY 1): 'lower' - * (NPY 2): 'higher', - * (NPY 3): 'midpoint' - * (NPY 4): 'nearest' - * (NPY 5): 'linear' (default) - - New options: + method : str, optional + This parameter specifies the method to use for estimating the + quantile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options aligning with the R types + and the H&F paper [1]_ are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' @@ -1432,7 +1440,17 @@ def nanquantile( * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' + Mainly for compatibility reasons, NumPy also supports the following + options which appear to be unique to NumPy: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. keepdims : bool, optional If this is set to True, the axes which are reduced are left in @@ -1445,6 +1463,11 @@ def nanquantile( a sub-class and `mean` does not have the kwarg `keepdims` this will raise a RuntimeError. + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + Returns ------- quantile : scalar or ndarray @@ -1496,12 +1519,16 @@ def nanquantile( >>> assert not np.all(a==b) """ + if interpolation is not None: + method = function_base._check_interpolation_as_method( + method, interpolation, "nanquantile") + a = np.asanyarray(a) q = np.asanyarray(q) if not function_base._quantile_is_valid(q): raise ValueError("Quantiles must be in the range [0, 1]") return _nanquantile_unchecked( - a, q, axis, out, overwrite_input, interpolation, keepdims) + a, q, axis, out, overwrite_input, method, keepdims) def _nanquantile_unchecked( @@ -1510,7 +1537,7 @@ def _nanquantile_unchecked( axis=None, out=None, overwrite_input=False, - interpolation="linear", + method="linear", keepdims=np._NoValue, ): """Assumes that q is in [0, 1], and is an ndarray""" @@ -1524,7 +1551,7 @@ def _nanquantile_unchecked( axis=axis, out=out, overwrite_input=overwrite_input, - interpolation=interpolation) + method=method) if keepdims and keepdims is not np._NoValue: return r.reshape(q.shape + k) else: @@ -1532,7 +1559,7 @@ def _nanquantile_unchecked( def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, - interpolation="linear"): + method="linear"): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -1540,10 +1567,10 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, """ if axis is None or a.ndim == 1: part = a.ravel() - result = _nanquantile_1d(part, q, overwrite_input, interpolation) + result = _nanquantile_1d(part, q, overwrite_input, method) else: result = np.apply_along_axis(_nanquantile_1d, axis, a, q, - overwrite_input, interpolation) + overwrite_input, method) # apply_along_axis fills in collapsed axis with results. # Move that axis to the beginning to match percentile's # convention. @@ -1555,7 +1582,7 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, return result -def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation="linear"): +def _nanquantile_1d(arr1d, q, overwrite_input=False, method="linear"): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage @@ -1567,7 +1594,7 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation="linear"): return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()] return function_base._quantile_unchecked( - arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) + arr1d, q, overwrite_input=overwrite_input, method=method) def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, -- cgit v1.2.1 From 0d5fb819bd6ff8f025db1dfdd0e86e109a64d694 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 8 Nov 2021 20:39:50 -0600 Subject: DOC: Remove reference to paper from quantile `method` kwarg Apparently, sphinx does not resolve references to footnotes from parameter descriptions. --- numpy/lib/nanfunctions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 4613c1d26..39e168944 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1274,7 +1274,7 @@ def nanpercentile( This parameter specifies the method to use for estimating the percentile. There are many different methods, some unique to NumPy. See the notes for explanation. The options aligning with the R types - and the H&F paper [1]_ are: + and the H&F paper are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' @@ -1428,7 +1428,7 @@ def nanquantile( This parameter specifies the method to use for estimating the quantile. There are many different methods, some unique to NumPy. See the notes for explanation. The options aligning with the R types - and the H&F paper [1]_ are: + and the H&F paper are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' -- cgit v1.2.1 From 7d8a8e746fc841a99f71242f60559b1be2e7340c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 12 Nov 2021 11:57:22 -0600 Subject: DOC: Update percentile/quantile docs Mainly fixes the method list slightly, tones down the warning a bit and fixes the link to the paper (I did not realize that the link failed to work due only because the reference was missing from nanquantile/nanpercentile). --- numpy/lib/nanfunctions.py | 72 +++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 30 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 39e168944..d7ea1ca65 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1273,21 +1273,21 @@ def nanpercentile( method : str, optional This parameter specifies the method to use for estimating the percentile. There are many different methods, some unique to NumPy. - See the notes for explanation. The options aligning with the R types - and the H&F paper are: - - * (H&F 1): 'inverted_cdf' - * (H&F 2): 'averaged_inverted_cdf' - * (H&F 3): 'closest_observation' - * (H&F 4): 'interpolated_inverted_cdf' - * (H&F 5): 'hazen' - * (H&F 6): 'weibull' - * (H&F 7): 'linear' (default) - * (H&F 8): 'median_unbiased' - * (H&F 9): 'normal_unbiased' - - Mainly for compatibility reasons, NumPy also supports the following - options which appear to be unique to NumPy: + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: * 'lower' * 'higher', @@ -1365,6 +1365,12 @@ def nanpercentile( array([7., 2.]) >>> assert not np.all(a==b) + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ if interpolation is not None: method = function_base._check_interpolation_as_method( @@ -1427,21 +1433,21 @@ def nanquantile( method : str, optional This parameter specifies the method to use for estimating the quantile. There are many different methods, some unique to NumPy. - See the notes for explanation. The options aligning with the R types - and the H&F paper are: - - * (H&F 1): 'inverted_cdf' - * (H&F 2): 'averaged_inverted_cdf' - * (H&F 3): 'closest_observation' - * (H&F 4): 'interpolated_inverted_cdf' - * (H&F 5): 'hazen' - * (H&F 6): 'weibull' - * (H&F 7): 'linear' (default) - * (H&F 8): 'median_unbiased' - * (H&F 9): 'normal_unbiased' - - Mainly for compatibility reasons, NumPy also supports the following - options which appear to be unique to NumPy: + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: * 'lower' * 'higher', @@ -1518,6 +1524,12 @@ def nanquantile( array([7., 2.]) >>> assert not np.all(a==b) + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ if interpolation is not None: method = function_base._check_interpolation_as_method( -- cgit v1.2.1