From 8599fe0abc88f584ee45b54654833cdccefc3cd0 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 4 Sep 2020 20:23:42 +0200 Subject: Fix docstring cross-referencing --- numpy/lib/nanfunctions.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 003550432..409016adb 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -244,7 +244,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the expected output, but the type will be cast if necessary. See - `ufuncs-output-type` for more details. + :ref:`ufuncs-output-type` for more details. .. versionadded:: 1.8.0 keepdims : bool, optional @@ -359,7 +359,7 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the expected output, but the type will be cast if necessary. See - `ufuncs-output-type` for more details. + :ref:`ufuncs-output-type` for more details. .. versionadded:: 1.8.0 keepdims : bool, optional @@ -584,7 +584,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): Alternate output array in which to place the result. The default is ``None``. If provided, it must have the same shape as the expected output, but the type will be cast if necessary. See - `ufuncs-output-type` for more details. The casting of NaN to integer + :ref:`ufuncs-output-type` for more details. The casting of NaN to integer can yield unexpected results. .. versionadded:: 1.8.0 @@ -681,7 +681,7 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): Alternate output array in which to place the result. The default is ``None``. If provided, it must have the same shape as the expected output, but the type will be cast if necessary. See - `ufuncs-output-type` for more details. The casting of NaN to integer + :ref:`ufuncs-output-type` for more details. The casting of NaN to integer can yield unexpected results. keepdims : bool, optional If True, the axes which are reduced are left in the result as @@ -749,7 +749,7 @@ def nancumsum(a, axis=None, dtype=None, out=None): out : ndarray, optional Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output - but the type will be cast if necessary. See `ufuncs-output-type` for + but the type will be cast if necessary. See :ref:`ufuncs-output-type` for more details. Returns @@ -888,7 +888,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the expected output, but the type will be cast if necessary. See - `ufuncs-output-type` for more details. + :ref:`ufuncs-output-type` for more details. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, @@ -1256,7 +1256,7 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, Compute the qth quantile of the data along the specified axis, while ignoring nan values. Returns the qth quantile(s) of the array elements. - + .. versionadded:: 1.15.0 Parameters @@ -1472,7 +1472,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): mean : Average var : Variance while not ignoring NaNs nanstd, nanmean - ufuncs-output-type + :ref:`ufuncs-output-type` Notes ----- @@ -1624,7 +1624,7 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): -------- var, mean, std nanvar, nanmean - ufuncs-output-type + :ref:`ufuncs-output-type` Notes ----- -- cgit v1.2.1 From 71ac6b78a14206a8e3b8f5f7e55d0af8ef6b4bf5 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sun, 31 Jan 2021 08:33:32 -0800 Subject: DOC: Nupydoc format space before `:` in Parameters Missing `s` in two spellings. --- numpy/lib/nanfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 409016adb..a02ad779f 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -613,7 +613,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): -------- numpy.sum : Sum across array propagating NaNs. isnan : Show which elements are NaN. - isfinite: Show which elements are not NaN or +/-inf. + isfinite : Show which elements are not NaN or +/-inf. Notes ----- -- cgit v1.2.1 From f91f4bcd050299c930092390b54ce9ba51fd70e0 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sat, 22 May 2021 15:48:23 +0200 Subject: BUG: Fixed an issue wherein `nanmedian` could return an array with the wrong dtype --- numpy/lib/nanfunctions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index a02ad779f..02ad01a98 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -965,7 +965,9 @@ def _nanmedian1d(arr1d, overwrite_input=False): arr1d, overwrite_input = _remove_nan_1d(arr1d, overwrite_input=overwrite_input) if arr1d.size == 0: - return np.nan + # Ensure that a nan-esque scalar of the appropiate type (and unit) + # is returned for `timedelta64` and `complexfloating` + return np.array(np.nan).astype(arr1d.dtype, copy=False)[()] return np.median(arr1d, overwrite_input=overwrite_input) -- cgit v1.2.1 From a8b825c0379972234a86f30b76ae9fc853a88b5e Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sat, 22 May 2021 16:04:26 +0200 Subject: BUG: Fixed an issue wherein `_nanmedian_small` would fail for `timedelta64`-based dtypes --- numpy/lib/nanfunctions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 02ad01a98..719c529c1 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1010,10 +1010,12 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): for i in range(np.count_nonzero(m.mask.ravel())): warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=4) + + fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan if out is not None: - out[...] = m.filled(np.nan) + out[...] = m.filled(fill_value) return out - return m.filled(np.nan) + return m.filled(fill_value) def _nanmedian_dispatcher( -- cgit v1.2.1 From ae9314eff5d539122bf87800a1bc50a9f99762a8 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sat, 22 May 2021 16:18:47 +0200 Subject: MAINT: Directly grab `nan` from the input array Directly grab a nan-esque object from the input array, rather than constructing a new one from scratch --- numpy/lib/nanfunctions.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 719c529c1..2c2c3435b 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -962,14 +962,16 @@ def _nanmedian1d(arr1d, overwrite_input=False): Private function for rank 1 arrays. Compute the median ignoring NaNs. See nanmedian for parameter usage """ - arr1d, overwrite_input = _remove_nan_1d(arr1d, - overwrite_input=overwrite_input) - if arr1d.size == 0: + arr1d_parsed, overwrite_input = _remove_nan_1d( + arr1d, overwrite_input=overwrite_input, + ) + + if arr1d_parsed.size == 0: # Ensure that a nan-esque scalar of the appropiate type (and unit) # is returned for `timedelta64` and `complexfloating` - return np.array(np.nan).astype(arr1d.dtype, copy=False)[()] + return arr1d[-1] - return np.median(arr1d, overwrite_input=overwrite_input) + return np.median(arr1d_parsed, overwrite_input=overwrite_input) def _nanmedian(a, axis=None, out=None, overwrite_input=False): -- cgit v1.2.1 From b6d7c4680e23520fd90387f72d136717ed882bc0 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Fri, 3 Sep 2021 15:41:54 +0200 Subject: BUG: Fixed an issue wherein certain `nan` functions could fail for object arrays --- numpy/lib/nanfunctions.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 2c2c3435b..65e1ca936 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -160,6 +160,8 @@ def _remove_nan_1d(arr1d, overwrite_input=False): True if `res` can be modified in place, given the constraint on the input """ + if arr1d.dtype == object: + return arr1d, True c = np.isnan(arr1d) s = np.nonzero(c)[0] @@ -214,7 +216,11 @@ def _divide_by_count(a, b, out=None): return np.divide(a, b, out=out, casting='unsafe') else: if out is None: - return a.dtype.type(a / b) + # Precaution against reduced object arrays + try: + return a.dtype.type(a / b) + except AttributeError: + return a / b else: # This is questionable, but currently a numpy scalar can # be output to a zero dimensional array. @@ -1551,7 +1557,13 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): # Compute variance. var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - if var.ndim < cnt.ndim: + + # Precaution against reduced object arrays + try: + var_ndim = var.ndim + except AttributeError: + var_ndim = np.ndim(var) + if var_ndim < cnt.ndim: # Subclasses of ndarray may ignore keepdims, so check here. cnt = cnt.squeeze(axis) dof = cnt - ddof @@ -1672,5 +1684,10 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): if isinstance(var, np.ndarray): std = np.sqrt(var, out=var) else: - std = var.dtype.type(np.sqrt(var)) + # Precaution against reduced object arrays + try: + std = var.dtype.type(np.sqrt(var)) + except AttributeError: + cls = type(var) + std = cls(np.sqrt(var)) return std -- cgit v1.2.1 From a0ea053cd7ee8af1bd05a49b9577eb13fa1e28b7 Mon Sep 17 00:00:00 2001 From: Bas van Beek <43369155+BvB93@users.noreply.github.com> Date: Sat, 4 Sep 2021 15:55:09 +0200 Subject: MAINT: Copy the `_methods._std` code-path for handling `nanstd` object-arrays https://github.com/numpy/numpy/blob/410a89ef04a2d3c50dd2dba2ad403c872c3745ac/numpy/core/_methods.py#L265-L270 --- numpy/lib/nanfunctions.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 65e1ca936..46c71e1f5 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1683,11 +1683,8 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): keepdims=keepdims) if isinstance(var, np.ndarray): std = np.sqrt(var, out=var) + elif hasattr(var, 'dtype'): + std = var.dtype.type(np.sqrt(var)) else: - # Precaution against reduced object arrays - try: - std = var.dtype.type(np.sqrt(var)) - except AttributeError: - cls = type(var) - std = cls(np.sqrt(var)) + std = np.sqrt(var) return std -- cgit v1.2.1 From ecba7133ffef32b817d53bcb2ceeb3113b37bb07 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sun, 5 Sep 2021 23:46:34 +0200 Subject: MAINT: Let `_remove_nan_1d` attempt to identify nan-containing object arrays Use the same approach as in numpy/numpy#9013 --- numpy/lib/nanfunctions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 46c71e1f5..766bf3c82 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -161,9 +161,11 @@ def _remove_nan_1d(arr1d, overwrite_input=False): input """ if arr1d.dtype == object: - return arr1d, True + # object arrays do not support `isnan` (gh-9009), so make a guess + c = np.not_equal(arr1d, arr1d, dtype=bool) + else: + c = np.isnan(arr1d) - c = np.isnan(arr1d) s = np.nonzero(c)[0] if s.size == arr1d.size: warnings.warn("All-NaN slice encountered", RuntimeWarning, -- cgit v1.2.1 From 95e5d5abbfa6d1f7e5d0ca5d9e6edc60650cb6a6 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sat, 11 Sep 2021 13:54:56 +0200 Subject: BUG: Fixed an issue wherein `nanpercentile` and `nanquantile` would ignore the dtype for all-nan arrays --- numpy/lib/nanfunctions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 766bf3c82..658ec5255 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1421,7 +1421,8 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): arr1d, overwrite_input = _remove_nan_1d(arr1d, overwrite_input=overwrite_input) if arr1d.size == 0: - return np.full(q.shape, np.nan)[()] # convert to scalar + # convert to scalar + return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()] return function_base._quantile_unchecked( arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) -- cgit v1.2.1 From 83960267dc097742cb67ef575504afa56f82b102 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Tue, 21 Sep 2021 09:18:37 +0200 Subject: DOC: Typos found by codespell --- numpy/lib/nanfunctions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 658ec5255..4e77f0d92 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -975,7 +975,7 @@ def _nanmedian1d(arr1d, overwrite_input=False): ) if arr1d_parsed.size == 0: - # Ensure that a nan-esque scalar of the appropiate type (and unit) + # Ensure that a nan-esque scalar of the appropriate type (and unit) # is returned for `timedelta64` and `complexfloating` return arr1d[-1] -- cgit v1.2.1 From 0b1825d17b48a761f7efd13b08e327455027f7c3 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Mon, 4 Oct 2021 15:37:28 +0200 Subject: ENH: Bring the `nan` signatures up to parity with their ``-based counterparts * nanmin: initial & where * nanmax: initial & where * nanargmin: keepdims & out * nanargmax: keepdims & out * nansum: initial & where * nanprod: initial & where * nanmean: where * nanvar: where * nanstd: where --- numpy/lib/nanfunctions.py | 174 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 139 insertions(+), 35 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 4e77f0d92..08d9b42bb 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -229,12 +229,14 @@ def _divide_by_count(a, b, out=None): return np.divide(a, b, out=out, casting='unsafe') -def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None): +def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None, + initial=None, where=None): return (a, out) @array_function_dispatch(_nanmin_dispatcher) -def nanmin(a, axis=None, out=None, keepdims=np._NoValue): +def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, + where=np._NoValue): """ Return minimum of an array or minimum along an axis, ignoring any NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is raised and @@ -266,6 +268,16 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): does not implement `keepdims` any exceptions will be raised. .. versionadded:: 1.8.0 + initial : scalar, optional + The maximum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to compare for the minimum. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 Returns ------- @@ -321,6 +333,11 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): kwargs = {} if keepdims is not np._NoValue: kwargs['keepdims'] = keepdims + if initial is not np._NoValue: + kwargs['initial'] = initial + if where is not np._NoValue: + kwargs['where'] = where + if type(a) is np.ndarray and a.dtype != np.object_: # Fast, but not safe for subclasses of ndarray, or object arrays, # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) @@ -336,6 +353,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): return res # Check for all-NaN axis + kwargs.pop("initial", None) mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) @@ -344,12 +362,14 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): return res -def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None): +def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None, + initial=None, where=None): return (a, out) @array_function_dispatch(_nanmax_dispatcher) -def nanmax(a, axis=None, out=None, keepdims=np._NoValue): +def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, + where=np._NoValue): """ Return the maximum of an array or maximum along an axis, ignoring any NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is @@ -381,6 +401,16 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): does not implement `keepdims` any exceptions will be raised. .. versionadded:: 1.8.0 + initial : scalar, optional + The minimum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to compare for the maximum. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 Returns ------- @@ -436,6 +466,11 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): kwargs = {} if keepdims is not np._NoValue: kwargs['keepdims'] = keepdims + if initial is not np._NoValue: + kwargs['initial'] = initial + if where is not np._NoValue: + kwargs['where'] = where + if type(a) is np.ndarray and a.dtype != np.object_: # Fast, but not safe for subclasses of ndarray, or object arrays, # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) @@ -451,6 +486,7 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): return res # Check for all-NaN axis + kwargs.pop("initial", None) mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) @@ -459,12 +495,12 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): return res -def _nanargmin_dispatcher(a, axis=None): +def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None): return (a,) @array_function_dispatch(_nanargmin_dispatcher) -def nanargmin(a, axis=None): +def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue): """ Return the indices of the minimum values in the specified axis ignoring NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results @@ -476,6 +512,17 @@ def nanargmin(a, axis=None): Input data. axis : int, optional Axis along which to operate. By default flattened input is used. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + + .. versionadded:: 1.22.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + .. versionadded:: 1.22.0 Returns ------- @@ -500,20 +547,20 @@ def nanargmin(a, axis=None): """ a, mask = _replace_nan(a, np.inf) - res = np.argmin(a, axis=axis) if mask is not None: mask = np.all(mask, axis=axis) if np.any(mask): raise ValueError("All-NaN slice encountered") + res = np.argmin(a, axis=axis, out=out, keepdims=keepdims) return res -def _nanargmax_dispatcher(a, axis=None): +def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None): return (a,) @array_function_dispatch(_nanargmax_dispatcher) -def nanargmax(a, axis=None): +def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue): """ Return the indices of the maximum values in the specified axis ignoring NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the @@ -526,6 +573,17 @@ def nanargmax(a, axis=None): Input data. axis : int, optional Axis along which to operate. By default flattened input is used. + out : array, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + + .. versionadded:: 1.22.0 + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + .. versionadded:: 1.22.0 Returns ------- @@ -550,20 +608,22 @@ def nanargmax(a, axis=None): """ a, mask = _replace_nan(a, -np.inf) - res = np.argmax(a, axis=axis) if mask is not None: mask = np.all(mask, axis=axis) if np.any(mask): raise ValueError("All-NaN slice encountered") + res = np.argmax(a, axis=axis, out=out, keepdims=keepdims) return res -def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): +def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + initial=None, where=None): return (a, out) @array_function_dispatch(_nansum_dispatcher) -def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + initial=np._NoValue, where=np._NoValue): """ Return the sum of array elements over a given axis treating Not a Numbers (NaNs) as zero. @@ -608,6 +668,14 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): does not implement `keepdims` any exceptions will be raised. .. versionadded:: 1.8.0 + initial : scalar, optional + Starting value for the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to include in the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 Returns ------- @@ -653,15 +721,18 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ a, mask = _replace_nan(a, 0) - return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + initial=initial, where=where) -def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): +def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + initial=None, where=None): return (a, out) @array_function_dispatch(_nanprod_dispatcher) -def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + initial=np._NoValue, where=np._NoValue): """ Return the product of array elements over a given axis treating Not a Numbers (NaNs) as ones. @@ -695,6 +766,16 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): If True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original `arr`. + initial : scalar, optional + The starting value for this product. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 + where : array_like of bool, optional + Elements to include in the product. See `~numpy.ufunc.reduce` + for details. + + .. versionadded:: 1.22.0 Returns ------- @@ -723,7 +804,8 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ a, mask = _replace_nan(a, 1) - return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + initial=initial, where=where) def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None): @@ -863,12 +945,14 @@ def nancumprod(a, axis=None, dtype=None, out=None): return np.cumprod(a, axis=axis, dtype=dtype, out=out) -def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): +def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None, + *, where=None): return (a, out) @array_function_dispatch(_nanmean_dispatcher) -def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, + *, where=np._NoValue): """ Compute the arithmetic mean along the specified axis, ignoring NaNs. @@ -906,6 +990,10 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): `keepdims` will be passed through to the `mean` or `sum` methods of sub-classes of `ndarray`. If the sub-classes methods does not implement `keepdims` any exceptions will be raised. + where : array_like of bool, optional + Elements to include in the mean. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 Returns ------- @@ -944,7 +1032,8 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ arr, mask = _replace_nan(a, 0) if mask is None: - return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + where=where) if dtype is not None: dtype = np.dtype(dtype) @@ -953,8 +1042,10 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): if out is not None and not issubclass(out.dtype.type, np.inexact): raise TypeError("If a is inexact, then out must be inexact") - cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) - tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims, + where=where) + tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + where=where) avg = _divide_by_count(tot, cnt, out=out) isbad = (cnt == 0) @@ -1428,13 +1519,14 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) -def _nanvar_dispatcher( - a, axis=None, dtype=None, out=None, ddof=None, keepdims=None): +def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, + keepdims=None, *, where=None): return (a, out) @array_function_dispatch(_nanvar_dispatcher) -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, + *, where=np._NoValue): """ Compute the variance along the specified axis, while ignoring NaNs. @@ -1471,7 +1563,11 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original `a`. + where : array_like of bool, optional + Elements to include in the variance. See `~numpy.ufunc.reduce` for + details. + .. versionadded:: 1.22.0 Returns ------- @@ -1527,7 +1623,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): arr, mask = _replace_nan(a, 0) if mask is None: return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) + keepdims=keepdims, where=where) if dtype is not None: dtype = np.dtype(dtype) @@ -1546,20 +1642,22 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): # keepdims=True, however matrix now raises an error in this case, but # the reason that it drops the keepdims kwarg is to force keepdims=True # so this used to work by serendipity. - cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims) - avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims) + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims, + where=where) + avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims, where=where) avg = _divide_by_count(avg, cnt) # Compute squared deviation from mean. - np.subtract(arr, avg, out=arr, casting='unsafe') + np.subtract(arr, avg, out=arr, casting='unsafe', where=where) arr = _copyto(arr, 0, mask) if issubclass(arr.dtype.type, np.complexfloating): - sqr = np.multiply(arr, arr.conj(), out=arr).real + sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real else: - sqr = np.multiply(arr, arr, out=arr) + sqr = np.multiply(arr, arr, out=arr, where=where) # Compute variance. - var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims, + where=where) # Precaution against reduced object arrays try: @@ -1582,13 +1680,14 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): return var -def _nanstd_dispatcher( - a, axis=None, dtype=None, out=None, ddof=None, keepdims=None): +def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, + keepdims=None, *, where=None): return (a, out) @array_function_dispatch(_nanstd_dispatcher) -def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, + *, where=np._NoValue): """ Compute the standard deviation along the specified axis, while ignoring NaNs. @@ -1632,6 +1731,11 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): as-is to the relevant functions of the sub-classes. If these functions do not have a `keepdims` kwarg, a RuntimeError will be raised. + where : array_like of bool, optional + Elements to include in the standard deviation. + See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.22.0 Returns ------- @@ -1683,7 +1787,7 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): """ var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) + keepdims=keepdims, where=where) if isinstance(var, np.ndarray): std = np.sqrt(var, out=var) elif hasattr(var, 'dtype'): -- cgit v1.2.1 From cd7a02a4db7e760b881f3feeb832ffd84fa8645a Mon Sep 17 00:00:00 2001 From: abel Date: Thu, 2 Sep 2021 16:34:42 +0200 Subject: MAINT, ENH [#10736] Add interpolation methods to quantile - Added the missing linear interpolation methods. - Updated the existing unit tests. - Added pytest.mark.xfail for boolean arrays See - https://github.com/numpy/numpy/pull/19857#issuecomment-919258693 - https://github.com/numpy/numpy/issues/19154 --- numpy/lib/nanfunctions.py | 212 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 178 insertions(+), 34 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 08d9b42bb..e90c19b4a 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -23,6 +23,7 @@ Functions import functools import warnings import numpy as np +from numpy.lib.function_base import _QuantileInterpolation from numpy.lib import function_base from numpy.core import overrides @@ -1229,8 +1230,15 @@ def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, @array_function_dispatch(_nanpercentile_dispatcher) -def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): +def nanpercentile( + a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation="linear", + keepdims=np._NoValue, +): """ Compute the qth percentile of the data along the specified axis, while ignoring nan values. @@ -1259,18 +1267,74 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + interpolation : str + Possible values: 'linear' (default), + 'inverted_cdf', 'averaged_inverted_cdf', + 'closest_observation', 'interpolated_inverted_cdf', + 'hazen', 'weibull', + 'median_unbiased', 'normal_unbiased', + 'lower', 'higher', + 'midpoint', 'nearest'. This optional parameter specifies the interpolation method to - use when the desired percentile lies between two data points - ``i < j``: - - * 'linear': ``i + (j - i) * fraction``, where ``fraction`` - is the fractional part of the index surrounded by ``i`` - and ``j``. - * 'lower': ``i``. - * 'higher': ``j``. - * 'nearest': ``i`` or ``j``, whichever is nearest. - * 'midpoint': ``(i + j) / 2``. + use when the desired quantile lies between two data points ``i < j``. + g is the fractional part of the index surrounded by ``i``. + alpha and beta are correction constants modifying i and j: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + * inverted_cdf: + method 1 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then take i + * averaged_inverted_cdf: + method 2 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then average between bounds + * closest_observation: + method 3 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 and index is odd ; then take j + if g = 0 and index is even ; then take i + * interpolated_inverted_cdf: + method 4 of H&F. + This method give continuous results using: + alpha = 0 + beta = 1 + * hazen: + method 5 of H&F. + This method give continuous results using: + alpha = 1/2 + beta = 1/2 + * weibull: + method 6 of H&F. + This method give continuous results using: + alpha = 0 + beta = 0 + * linear: + Default method. + method 7 of H&F. + This method give continuous results using: + alpha = 1 + beta = 1 + * median_unbiased: + method 8 of H&F. + This method is probably the best method if the sample distribution + function is unknown (see reference). + This method give continuous results using: + alpha = 1/3 + beta = 1/3 + * normal_unbiased: + method 9 of H&F. + This method is probably the best method if the sample distribution + function is known to be normal. + This method give continuous results using: + alpha = 3/8 + beta = 3/8 + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1342,7 +1406,9 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, """ a = np.asanyarray(a) - q = np.true_divide(q, 100.0) # handles the asarray for us too + q = np.true_divide(q, 100.0) + # undo any decay that the ufunc performed (see gh-13105) + q = np.asanyarray(q) if not function_base._quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _nanquantile_unchecked( @@ -1355,8 +1421,15 @@ def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, @array_function_dispatch(_nanquantile_dispatcher) -def nanquantile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): +def nanquantile( + a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation="linear", + keepdims=np._NoValue, +): """ Compute the qth quantile of the data along the specified axis, while ignoring nan values. @@ -1384,19 +1457,74 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + interpolation : str + Possible values: 'linear' (default), + 'inverted_cdf', 'averaged_inverted_cdf', + 'closest_observation', 'interpolated_inverted_cdf', + 'hazen', 'weibull', + 'median_unbiased', 'normal_unbiased', + 'lower', 'higher', + 'midpoint', 'nearest'. This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points - ``i < j``: - - * linear: ``i + (j - i) * fraction``, where ``fraction`` - is the fractional part of the index surrounded by ``i`` - and ``j``. + use when the desired quantile lies between two data points ``i < j``. + g is the fractional part of the index surrounded by ``i``. + alpha and beta are correction constants modifying i and j: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + * inverted_cdf: + method 1 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then take i + * averaged_inverted_cdf: + method 2 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 ; then average between bounds + * closest_observation: + method 3 of H&F. + This method give discontinuous results: + if g > 0 ; then take j + if g = 0 and index is odd ; then take j + if g = 0 and index is even ; then take i + * interpolated_inverted_cdf: + method 4 of H&F. + This method give continuous results using: + alpha = 0 + beta = 1 + * hazen: + method 5 of H&F. + This method give continuous results using: + alpha = 1/2 + beta = 1/2 + * weibull: + method 6 of H&F. + This method give continuous results using: + alpha = 0 + beta = 0 + * linear: + Default method. + method 7 of H&F. + This method give continuous results using: + alpha = 1 + beta = 1 + * median_unbiased: + method 8 of H&F. + This method is probably the best method if the sample distribution + function is unknown (see reference). + This method give continuous results using: + alpha = 1/3 + beta = 1/3 + * normal_unbiased: + method 9 of H&F. + This method is probably the best method if the sample distribution + function is known to be normal. + This method give continuous results using: + alpha = 3/8 + beta = 3/8 * lower: ``i``. * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. - keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1462,26 +1590,39 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) -def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=np._NoValue): +def _nanquantile_unchecked( + a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation="linear", + keepdims=np._NoValue, +): """Assumes that q is in [0, 1], and is an ndarray""" # apply_along_axis in _nanpercentile doesn't handle empty arrays well, # so deal them upfront if a.size == 0: return np.nanmean(a, axis, out=out, keepdims=keepdims) - - r, k = function_base._ureduce( - a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out, - overwrite_input=overwrite_input, interpolation=interpolation - ) + r, k = function_base._ureduce(a, + func=_nanquantile_ureduce_func, + q=q, + axis=axis, + out=out, + overwrite_input=overwrite_input, + interpolation=interpolation) if keepdims and keepdims is not np._NoValue: return r.reshape(q.shape + k) else: return r -def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear'): +def _nanquantile_ureduce_func(a, + q, + axis=None, + out=None, + overwrite_input=False, + interpolation= "linear"): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -1504,7 +1645,10 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, return result -def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): +def _nanquantile_1d(arr1d, + q, + overwrite_input=False, + interpolation= "linear"): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage -- cgit v1.2.1 From 303c12cfe7ad1b8b6ed5417c126857b29355b1fb Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Fri, 8 Oct 2021 17:09:51 -0600 Subject: DOC: fix docstrings. Hopefully fix the docstrings of percentile, nanpercentile, quantile, and nanquantile so that CircleCI passes. --- numpy/lib/nanfunctions.py | 424 +++++++++++++++++++++++++++++----------------- 1 file changed, 268 insertions(+), 156 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index e90c19b4a..710242d59 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1253,88 +1253,47 @@ def nanpercentile( Input array or object that can be converted to an array, containing nan values to be ignored. q : array_like of float - Percentile or sequence of percentiles to compute, which must be between - 0 and 100 inclusive. + Percentile or sequence of percentiles to compute, which must be + between 0 and 100 inclusive. axis : {int, tuple of int, None}, optional - Axis or axes along which the percentiles are computed. The - default is to compute the percentile(s) along a flattened - version of the array. + Axis or axes along which the percentiles are computed. The default + is to compute the percentile(s) along a flattened version of the + array. out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output, - but the type (of the output) will be cast if necessary. + Alternative output array in which to place the result. It must have + the same shape and buffer length as the expected output, but the + type (of the output) will be cast if necessary. overwrite_input : bool, optional - If True, then allow the input array `a` to be modified by intermediate - calculations, to save memory. In this case, the contents of the input - `a` after this function completes is undefined. - interpolation : str - Possible values: 'linear' (default), - 'inverted_cdf', 'averaged_inverted_cdf', - 'closest_observation', 'interpolated_inverted_cdf', - 'hazen', 'weibull', - 'median_unbiased', 'normal_unbiased', - 'lower', 'higher', - 'midpoint', 'nearest'. - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points ``i < j``. - g is the fractional part of the index surrounded by ``i``. - alpha and beta are correction constants modifying i and j: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - * inverted_cdf: - method 1 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then take i - * averaged_inverted_cdf: - method 2 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then average between bounds - * closest_observation: - method 3 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 and index is odd ; then take j - if g = 0 and index is even ; then take i - * interpolated_inverted_cdf: - method 4 of H&F. - This method give continuous results using: - alpha = 0 - beta = 1 - * hazen: - method 5 of H&F. - This method give continuous results using: - alpha = 1/2 - beta = 1/2 - * weibull: - method 6 of H&F. - This method give continuous results using: - alpha = 0 - beta = 0 - * linear: - Default method. - method 7 of H&F. - This method give continuous results using: - alpha = 1 - beta = 1 - * median_unbiased: - method 8 of H&F. - This method is probably the best method if the sample distribution - function is unknown (see reference). - This method give continuous results using: - alpha = 1/3 - beta = 1/3 - * normal_unbiased: - method 9 of H&F. - This method is probably the best method if the sample distribution - function is known to be normal. - This method give continuous results using: - alpha = 3/8 - beta = 3/8 - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + interpolation : str, optional + This parameter specifies the interpolation method to use when the + desired quantile lies between two data points There are many + different methods, some unique to NumPy. See the notes for + explanation. Options: + + * (NPY 1): 'lower' + * (NPY 2): 'higher', + * (NPY 3): 'midpoint' + * (NPY 4): 'nearest' + * (NPY 5): 'linear', aliased with 'inclusive' (default) + + New options: + + * (H&F 1): 'inverted_cdf' + * (H&F 2): 'averaged_inverted_cdf' + * (H&F 3): 'closest_observation' + * (H&F 4): 'interpolated_inverted_cdf' + * (H&F 5): 'hazen' + * (H&F 6): 'weibull' + * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 8): 'median_unbiased' + * (H&F 9): 'normal_unbiased' + + .. versionadded:: 1.22.0 + keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1363,18 +1322,104 @@ def nanpercentile( nanmean nanmedian : equivalent to ``nanpercentile(..., 50)`` percentile, median, mean - nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. + nanquantile : equivalent to nanpercentile, except q in range [0, 1]. Notes ----- - Given a vector ``V`` of length ``N``, the ``q``-th percentile of - ``V`` is the value ``q/100`` of the way from the minimum to the - maximum in a sorted copy of ``V``. The values and distances of - the two nearest neighbors as well as the `interpolation` parameter - will determine the percentile if the normalized ranking does not - match the location of ``q`` exactly. This function is the same as - the median if ``q=50``, the same as the minimum if ``q=0`` and the - same as the maximum if ``q=100``. + Given a vector ``V`` of length ``N``, the ``q``-th percentile of ``V`` + is the value ``q/100`` of the way from the minimum to the maximum in a + sorted copy of ``V``. The values and distances of the two nearest + neighbors as well as the `interpolation` parameter will determine the + percentile if the normalized ranking does not match the location of + ``q`` exactly. This function is the same as the median if ``q=50``, the + same as the minimum if ``q=0`` and the same as the maximum if + ``q=100``. + + This optional `interpolation` parameter specifies the interpolation + method to use when the desired quantile lies between two data points + ``i < j``. If ``g`` is the fractional part of the index surrounded by + ``i`` and alpha and beta are correction constants modifying i and j. + + .. math:: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + + The different interpolation methods then work as follows + + inverted_cdf: + method 1 of H&F [1]_. + This method gives discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then take i + + averaged_inverted_cdf: + method 2 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then average between bounds + + closest_observation: + method 3 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 and index is odd ; then take j + * if g = 0 and index is even ; then take i + + interpolated_inverted_cdf: + method 4 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 1 + + hazen: + method 5 of H&F [1]_. + This method give continuous results using: + * alpha = 1/2 + * beta = 1/2 + + weibull: + method 6 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 0 + + inclusive: + Default method, aliased with "linear". + method 7 of H&F [1]_. + This method give continuous results using: + * alpha = 1 + * beta = 1 + + median_unbiased: + method 8 of H&F [1]_. + This method is probably the best method if the sample + distribution function is unknown (see reference). + This method give continuous results using: + * alpha = 1/3 + * beta = 1/3 + + normal_unbiased: + method 9 of H&F [1]_. + This method is probably the best method if the sample + distribution function is known to be normal. + This method give continuous results using: + * alpha = 3/8 + * beta = 3/8 + + lower: + NumPy method kept for backwards compatibility. + Takes ``i`` as the interpolation point. + + higher: + NumPy method kept for backwards compatibility. + Takes ``j`` as the interpolation point. + + nearest: + NumPy method kept for backwards compatibility. + Takes ``i`` or ``j``, whichever is nearest. + + midpoint: + NumPy method kept for backwards compatibility. + Uses ``(i + j) / 2``. Examples -------- @@ -1404,6 +1449,12 @@ def nanpercentile( array([7., 2.]) >>> assert not np.all(a==b) + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ a = np.asanyarray(a) q = np.true_divide(q, 100.0) @@ -1457,74 +1508,32 @@ def nanquantile( If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : str - Possible values: 'linear' (default), - 'inverted_cdf', 'averaged_inverted_cdf', - 'closest_observation', 'interpolated_inverted_cdf', - 'hazen', 'weibull', - 'median_unbiased', 'normal_unbiased', - 'lower', 'higher', - 'midpoint', 'nearest'. - This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points ``i < j``. - g is the fractional part of the index surrounded by ``i``. - alpha and beta are correction constants modifying i and j: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - * inverted_cdf: - method 1 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then take i - * averaged_inverted_cdf: - method 2 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 ; then average between bounds - * closest_observation: - method 3 of H&F. - This method give discontinuous results: - if g > 0 ; then take j - if g = 0 and index is odd ; then take j - if g = 0 and index is even ; then take i - * interpolated_inverted_cdf: - method 4 of H&F. - This method give continuous results using: - alpha = 0 - beta = 1 - * hazen: - method 5 of H&F. - This method give continuous results using: - alpha = 1/2 - beta = 1/2 - * weibull: - method 6 of H&F. - This method give continuous results using: - alpha = 0 - beta = 0 - * linear: - Default method. - method 7 of H&F. - This method give continuous results using: - alpha = 1 - beta = 1 - * median_unbiased: - method 8 of H&F. - This method is probably the best method if the sample distribution - function is unknown (see reference). - This method give continuous results using: - alpha = 1/3 - beta = 1/3 - * normal_unbiased: - method 9 of H&F. - This method is probably the best method if the sample distribution - function is known to be normal. - This method give continuous results using: - alpha = 3/8 - beta = 3/8 - * lower: ``i``. - * higher: ``j``. - * nearest: ``i`` or ``j``, whichever is nearest. - * midpoint: ``(i + j) / 2``. + interpolation : str, optional + This parameter specifies the interpolation method to + use when the desired quantile lies between two data points + There are many different methods, some unique to NumPy. See the + notes for explanation. Options: + + * (NPY 1): 'lower' + * (NPY 2): 'higher', + * (NPY 3): 'midpoint' + * (NPY 4): 'nearest' + * (NPY 5): 'linear', aliased with 'inclusive' (default) + + New options: + + * (H&F 1): 'inverted_cdf' + * (H&F 2): 'averaged_inverted_cdf' + * (H&F 3): 'closest_observation' + * (H&F 4): 'interpolated_inverted_cdf' + * (H&F 5): 'hazen' + * (H&F 6): 'weibull' + * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 8): 'median_unbiased' + * (H&F 9): 'normal_unbiased' + + .. versionadded;: 1.22.0 + keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the @@ -1555,6 +1564,102 @@ def nanquantile( nanmedian : equivalent to ``nanquantile(..., 0.5)`` nanpercentile : same as nanquantile, but with q in the range [0, 100]. + Notes + ----- + Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the + value ``q`` of the way from the minimum to the maximum in a sorted copy of + ``V``. The values and distances of the two nearest neighbors as well as the + `interpolation` parameter will determine the quantile if the normalized + ranking does not match the location of ``q`` exactly. This function is the + same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and + the same as the maximum if ``q=1.0``. + + This optional `interpolation` parameter specifies the interpolation method + to use when the desired quantile lies between two data points ``i < j``. If + ``g`` is the fractional part of the index surrounded by ``i`` and alpha + and beta are correction constants modifying i and j. + + .. math:: + i + g = (q - alpha) / ( n - alpha - beta + 1 ) + + The different interpolation methods then work as follows + + inverted_cdf: + method 1 of H&F [1]_. + This method gives discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then take i + + averaged_inverted_cdf: + method 2 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 ; then average between bounds + + closest_observation: + method 3 of H&F [1]_. + This method give discontinuous results: + * if g > 0 ; then take j + * if g = 0 and index is odd ; then take j + * if g = 0 and index is even ; then take i + + interpolated_inverted_cdf: + method 4 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 1 + + hazen: + method 5 of H&F [1]_. + This method give continuous results using: + * alpha = 1/2 + * beta = 1/2 + + weibull: + method 6 of H&F [1]_. + This method give continuous results using: + * alpha = 0 + * beta = 0 + + inclusive: + Default method, aliased with "linear". + method 7 of H&F [1]_. + This method give continuous results using: + * alpha = 1 + * beta = 1 + + median_unbiased: + method 8 of H&F [1]_. + This method is probably the best method if the sample + distribution function is unknown (see reference). + This method give continuous results using: + * alpha = 1/3 + * beta = 1/3 + + normal_unbiased: + method 9 of H&F [1]_. + This method is probably the best method if the sample + distribution function is known to be normal. + This method give continuous results using: + * alpha = 3/8 + * beta = 3/8 + + lower: + NumPy method kept for backwards compatibility. + Takes ``i`` as the interpolation point. + + higher: + NumPy method kept for backwards compatibility. + Takes ``j`` as the interpolation point. + + nearest: + NumPy method kept for backwards compatibility. + Takes ``i`` or ``j``, whichever is nearest. + + midpoint: + NumPy method kept for backwards compatibility. + Uses ``(i + j) / 2``. + Examples -------- >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) @@ -1581,6 +1686,13 @@ def nanquantile( >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) array([7., 2.]) >>> assert not np.all(a==b) + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ a = np.asanyarray(a) q = np.asanyarray(q) -- cgit v1.2.1 From 8413b5abf27221fb2bea070871c7cd8f8da5519c Mon Sep 17 00:00:00 2001 From: abel Date: Thu, 21 Oct 2021 09:59:53 +0200 Subject: MAINT: Clean following PR comments --- numpy/lib/nanfunctions.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 710242d59..c3333a83a 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1532,7 +1532,7 @@ def nanquantile( * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' - .. versionadded;: 1.22.0 + .. versionchanged:: 1.22.0 keepdims : bool, optional If this is set to True, the axes which are reduced are left in @@ -1729,12 +1729,8 @@ def _nanquantile_unchecked( return r -def _nanquantile_ureduce_func(a, - q, - axis=None, - out=None, - overwrite_input=False, - interpolation= "linear"): +def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, + interpolation="linear"): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -1757,10 +1753,7 @@ def _nanquantile_ureduce_func(a, return result -def _nanquantile_1d(arr1d, - q, - overwrite_input=False, - interpolation= "linear"): +def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation="linear"): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage -- cgit v1.2.1 From 1574011425611a0e43715f81c494004c26b95e92 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 1 Nov 2021 18:00:20 -0500 Subject: MAINT: Remove unnecessary import of _QuantileInterpolation --- numpy/lib/nanfunctions.py | 1 - 1 file changed, 1 deletion(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index c3333a83a..3189d2369 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -23,7 +23,6 @@ Functions import functools import warnings import numpy as np -from numpy.lib.function_base import _QuantileInterpolation from numpy.lib import function_base from numpy.core import overrides -- cgit v1.2.1 From f7911c67176c1d370be27726e87195699e4b581e Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 4 Nov 2021 13:26:37 -0500 Subject: DOC: Refer to the quantile/percentile notes for nan versions --- numpy/lib/nanfunctions.py | 203 +--------------------------------------------- 1 file changed, 3 insertions(+), 200 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 3189d2369..9fab77f45 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1269,7 +1269,7 @@ def nanpercentile( undefined. interpolation : str, optional This parameter specifies the interpolation method to use when the - desired quantile lies between two data points There are many + desired percentile lies between two data points There are many different methods, some unique to NumPy. See the notes for explanation. Options: @@ -1325,100 +1325,7 @@ def nanpercentile( Notes ----- - Given a vector ``V`` of length ``N``, the ``q``-th percentile of ``V`` - is the value ``q/100`` of the way from the minimum to the maximum in a - sorted copy of ``V``. The values and distances of the two nearest - neighbors as well as the `interpolation` parameter will determine the - percentile if the normalized ranking does not match the location of - ``q`` exactly. This function is the same as the median if ``q=50``, the - same as the minimum if ``q=0`` and the same as the maximum if - ``q=100``. - - This optional `interpolation` parameter specifies the interpolation - method to use when the desired quantile lies between two data points - ``i < j``. If ``g`` is the fractional part of the index surrounded by - ``i`` and alpha and beta are correction constants modifying i and j. - - .. math:: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - - The different interpolation methods then work as follows - - inverted_cdf: - method 1 of H&F [1]_. - This method gives discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then take i - - averaged_inverted_cdf: - method 2 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then average between bounds - - closest_observation: - method 3 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 and index is odd ; then take j - * if g = 0 and index is even ; then take i - - interpolated_inverted_cdf: - method 4 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 1 - - hazen: - method 5 of H&F [1]_. - This method give continuous results using: - * alpha = 1/2 - * beta = 1/2 - - weibull: - method 6 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 0 - - inclusive: - Default method, aliased with "linear". - method 7 of H&F [1]_. - This method give continuous results using: - * alpha = 1 - * beta = 1 - - median_unbiased: - method 8 of H&F [1]_. - This method is probably the best method if the sample - distribution function is unknown (see reference). - This method give continuous results using: - * alpha = 1/3 - * beta = 1/3 - - normal_unbiased: - method 9 of H&F [1]_. - This method is probably the best method if the sample - distribution function is known to be normal. - This method give continuous results using: - * alpha = 3/8 - * beta = 3/8 - - lower: - NumPy method kept for backwards compatibility. - Takes ``i`` as the interpolation point. - - higher: - NumPy method kept for backwards compatibility. - Takes ``j`` as the interpolation point. - - nearest: - NumPy method kept for backwards compatibility. - Takes ``i`` or ``j``, whichever is nearest. - - midpoint: - NumPy method kept for backwards compatibility. - Uses ``(i + j) / 2``. + For more information please see `numpy.percentile` Examples -------- @@ -1448,12 +1355,6 @@ def nanpercentile( array([7., 2.]) >>> assert not np.all(a==b) - References - ---------- - .. [1] R. J. Hyndman and Y. Fan, - "Sample quantiles in statistical packages," - The American Statistician, 50(4), pp. 361-365, 1996 - """ a = np.asanyarray(a) q = np.true_divide(q, 100.0) @@ -1565,99 +1466,7 @@ def nanquantile( Notes ----- - Given a vector ``V`` of length ``N``, the q-th quantile of ``V`` is the - value ``q`` of the way from the minimum to the maximum in a sorted copy of - ``V``. The values and distances of the two nearest neighbors as well as the - `interpolation` parameter will determine the quantile if the normalized - ranking does not match the location of ``q`` exactly. This function is the - same as the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and - the same as the maximum if ``q=1.0``. - - This optional `interpolation` parameter specifies the interpolation method - to use when the desired quantile lies between two data points ``i < j``. If - ``g`` is the fractional part of the index surrounded by ``i`` and alpha - and beta are correction constants modifying i and j. - - .. math:: - i + g = (q - alpha) / ( n - alpha - beta + 1 ) - - The different interpolation methods then work as follows - - inverted_cdf: - method 1 of H&F [1]_. - This method gives discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then take i - - averaged_inverted_cdf: - method 2 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 ; then average between bounds - - closest_observation: - method 3 of H&F [1]_. - This method give discontinuous results: - * if g > 0 ; then take j - * if g = 0 and index is odd ; then take j - * if g = 0 and index is even ; then take i - - interpolated_inverted_cdf: - method 4 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 1 - - hazen: - method 5 of H&F [1]_. - This method give continuous results using: - * alpha = 1/2 - * beta = 1/2 - - weibull: - method 6 of H&F [1]_. - This method give continuous results using: - * alpha = 0 - * beta = 0 - - inclusive: - Default method, aliased with "linear". - method 7 of H&F [1]_. - This method give continuous results using: - * alpha = 1 - * beta = 1 - - median_unbiased: - method 8 of H&F [1]_. - This method is probably the best method if the sample - distribution function is unknown (see reference). - This method give continuous results using: - * alpha = 1/3 - * beta = 1/3 - - normal_unbiased: - method 9 of H&F [1]_. - This method is probably the best method if the sample - distribution function is known to be normal. - This method give continuous results using: - * alpha = 3/8 - * beta = 3/8 - - lower: - NumPy method kept for backwards compatibility. - Takes ``i`` as the interpolation point. - - higher: - NumPy method kept for backwards compatibility. - Takes ``j`` as the interpolation point. - - nearest: - NumPy method kept for backwards compatibility. - Takes ``i`` or ``j``, whichever is nearest. - - midpoint: - NumPy method kept for backwards compatibility. - Uses ``(i + j) / 2``. + For more information please see `numpy.quantile` Examples -------- @@ -1686,12 +1495,6 @@ def nanquantile( array([7., 2.]) >>> assert not np.all(a==b) - References - ---------- - .. [1] R. J. Hyndman and Y. Fan, - "Sample quantiles in statistical packages," - The American Statistician, 50(4), pp. 361-365, 1996 - """ a = np.asanyarray(a) q = np.asanyarray(q) -- cgit v1.2.1 From 035d853e32d6e60a40a6a845699723238a01431b Mon Sep 17 00:00:00 2001 From: abel Date: Mon, 8 Nov 2021 17:35:19 +0100 Subject: DOC: Remove non-existent alias --- numpy/lib/nanfunctions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 9fab77f45..7e953be03 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1277,7 +1277,7 @@ def nanpercentile( * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' (default) New options: @@ -1287,7 +1287,7 @@ def nanpercentile( * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' @@ -1418,7 +1418,7 @@ def nanquantile( * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' (default) New options: @@ -1428,7 +1428,7 @@ def nanquantile( * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' -- cgit v1.2.1 From 3993408877ab414cb5e3639ac0e20fdec972933f Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 8 Nov 2021 15:38:30 -0600 Subject: API,DEP: Rename percentile/quantile `interpolation=` to `method=` --- numpy/lib/nanfunctions.py | 109 +++++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 7e953be03..4613c1d26 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1223,8 +1223,9 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu return r -def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, - interpolation=None, keepdims=None): +def _nanpercentile_dispatcher( + a, q, axis=None, out=None, overwrite_input=None, + method=None, keepdims=None, *, interpolation=None): return (a, q, out) @@ -1235,8 +1236,10 @@ def nanpercentile( axis=None, out=None, overwrite_input=False, - interpolation="linear", + method="linear", keepdims=np._NoValue, + *, + interpolation=None, ): """ Compute the qth percentile of the data along the specified axis, @@ -1267,19 +1270,11 @@ def nanpercentile( intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : str, optional - This parameter specifies the interpolation method to use when the - desired percentile lies between two data points There are many - different methods, some unique to NumPy. See the notes for - explanation. Options: - - * (NPY 1): 'lower' - * (NPY 2): 'higher', - * (NPY 3): 'midpoint' - * (NPY 4): 'nearest' - * (NPY 5): 'linear' (default) - - New options: + method : str, optional + This parameter specifies the method to use for estimating the + percentile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options aligning with the R types + and the H&F paper [1]_ are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' @@ -1291,7 +1286,17 @@ def nanpercentile( * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' - .. versionadded:: 1.22.0 + Mainly for compatibility reasons, NumPy also supports the following + options which appear to be unique to NumPy: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. keepdims : bool, optional If this is set to True, the axes which are reduced are left in @@ -1304,6 +1309,11 @@ def nanpercentile( a sub-class and `mean` does not have the kwarg `keepdims` this will raise a RuntimeError. + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + Returns ------- percentile : scalar or ndarray @@ -1356,6 +1366,10 @@ def nanpercentile( >>> assert not np.all(a==b) """ + if interpolation is not None: + method = function_base._check_interpolation_as_method( + method, interpolation, "nanpercentile") + a = np.asanyarray(a) q = np.true_divide(q, 100.0) # undo any decay that the ufunc performed (see gh-13105) @@ -1363,11 +1377,11 @@ def nanpercentile( if not function_base._quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _nanquantile_unchecked( - a, q, axis, out, overwrite_input, interpolation, keepdims) + a, q, axis, out, overwrite_input, method, keepdims) def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, - interpolation=None, keepdims=None): + method=None, keepdims=None, *, interpolation=None): return (a, q, out) @@ -1378,8 +1392,10 @@ def nanquantile( axis=None, out=None, overwrite_input=False, - interpolation="linear", + method="linear", keepdims=np._NoValue, + *, + interpolation=None, ): """ Compute the qth quantile of the data along the specified axis, @@ -1408,19 +1424,11 @@ def nanquantile( If True, then allow the input array `a` to be modified by intermediate calculations, to save memory. In this case, the contents of the input `a` after this function completes is undefined. - interpolation : str, optional - This parameter specifies the interpolation method to - use when the desired quantile lies between two data points - There are many different methods, some unique to NumPy. See the - notes for explanation. Options: - - * (NPY 1): 'lower' - * (NPY 2): 'higher', - * (NPY 3): 'midpoint' - * (NPY 4): 'nearest' - * (NPY 5): 'linear' (default) - - New options: + method : str, optional + This parameter specifies the method to use for estimating the + quantile. There are many different methods, some unique to NumPy. + See the notes for explanation. The options aligning with the R types + and the H&F paper [1]_ are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' @@ -1432,7 +1440,17 @@ def nanquantile( * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' + Mainly for compatibility reasons, NumPy also supports the following + options which appear to be unique to NumPy: + + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + .. versionchanged:: 1.22.0 + This argument was previously called "interpolation" and only + offered the "linear" default and last four options. keepdims : bool, optional If this is set to True, the axes which are reduced are left in @@ -1445,6 +1463,11 @@ def nanquantile( a sub-class and `mean` does not have the kwarg `keepdims` this will raise a RuntimeError. + interpolation : str, optional + Deprecated name for the method keyword argument. + + .. deprecated:: 1.22.0 + Returns ------- quantile : scalar or ndarray @@ -1496,12 +1519,16 @@ def nanquantile( >>> assert not np.all(a==b) """ + if interpolation is not None: + method = function_base._check_interpolation_as_method( + method, interpolation, "nanquantile") + a = np.asanyarray(a) q = np.asanyarray(q) if not function_base._quantile_is_valid(q): raise ValueError("Quantiles must be in the range [0, 1]") return _nanquantile_unchecked( - a, q, axis, out, overwrite_input, interpolation, keepdims) + a, q, axis, out, overwrite_input, method, keepdims) def _nanquantile_unchecked( @@ -1510,7 +1537,7 @@ def _nanquantile_unchecked( axis=None, out=None, overwrite_input=False, - interpolation="linear", + method="linear", keepdims=np._NoValue, ): """Assumes that q is in [0, 1], and is an ndarray""" @@ -1524,7 +1551,7 @@ def _nanquantile_unchecked( axis=axis, out=out, overwrite_input=overwrite_input, - interpolation=interpolation) + method=method) if keepdims and keepdims is not np._NoValue: return r.reshape(q.shape + k) else: @@ -1532,7 +1559,7 @@ def _nanquantile_unchecked( def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, - interpolation="linear"): + method="linear"): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -1540,10 +1567,10 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, """ if axis is None or a.ndim == 1: part = a.ravel() - result = _nanquantile_1d(part, q, overwrite_input, interpolation) + result = _nanquantile_1d(part, q, overwrite_input, method) else: result = np.apply_along_axis(_nanquantile_1d, axis, a, q, - overwrite_input, interpolation) + overwrite_input, method) # apply_along_axis fills in collapsed axis with results. # Move that axis to the beginning to match percentile's # convention. @@ -1555,7 +1582,7 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, return result -def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation="linear"): +def _nanquantile_1d(arr1d, q, overwrite_input=False, method="linear"): """ Private function for rank 1 arrays. Compute quantile ignoring NaNs. See nanpercentile for parameter usage @@ -1567,7 +1594,7 @@ def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation="linear"): return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()] return function_base._quantile_unchecked( - arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) + arr1d, q, overwrite_input=overwrite_input, method=method) def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None, -- cgit v1.2.1 From 0d5fb819bd6ff8f025db1dfdd0e86e109a64d694 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 8 Nov 2021 20:39:50 -0600 Subject: DOC: Remove reference to paper from quantile `method` kwarg Apparently, sphinx does not resolve references to footnotes from parameter descriptions. --- numpy/lib/nanfunctions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 4613c1d26..39e168944 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1274,7 +1274,7 @@ def nanpercentile( This parameter specifies the method to use for estimating the percentile. There are many different methods, some unique to NumPy. See the notes for explanation. The options aligning with the R types - and the H&F paper [1]_ are: + and the H&F paper are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' @@ -1428,7 +1428,7 @@ def nanquantile( This parameter specifies the method to use for estimating the quantile. There are many different methods, some unique to NumPy. See the notes for explanation. The options aligning with the R types - and the H&F paper [1]_ are: + and the H&F paper are: * (H&F 1): 'inverted_cdf' * (H&F 2): 'averaged_inverted_cdf' -- cgit v1.2.1 From 7d8a8e746fc841a99f71242f60559b1be2e7340c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 12 Nov 2021 11:57:22 -0600 Subject: DOC: Update percentile/quantile docs Mainly fixes the method list slightly, tones down the warning a bit and fixes the link to the paper (I did not realize that the link failed to work due only because the reference was missing from nanquantile/nanpercentile). --- numpy/lib/nanfunctions.py | 72 +++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 30 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 39e168944..d7ea1ca65 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1273,21 +1273,21 @@ def nanpercentile( method : str, optional This parameter specifies the method to use for estimating the percentile. There are many different methods, some unique to NumPy. - See the notes for explanation. The options aligning with the R types - and the H&F paper are: - - * (H&F 1): 'inverted_cdf' - * (H&F 2): 'averaged_inverted_cdf' - * (H&F 3): 'closest_observation' - * (H&F 4): 'interpolated_inverted_cdf' - * (H&F 5): 'hazen' - * (H&F 6): 'weibull' - * (H&F 7): 'linear' (default) - * (H&F 8): 'median_unbiased' - * (H&F 9): 'normal_unbiased' - - Mainly for compatibility reasons, NumPy also supports the following - options which appear to be unique to NumPy: + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: * 'lower' * 'higher', @@ -1365,6 +1365,12 @@ def nanpercentile( array([7., 2.]) >>> assert not np.all(a==b) + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ if interpolation is not None: method = function_base._check_interpolation_as_method( @@ -1427,21 +1433,21 @@ def nanquantile( method : str, optional This parameter specifies the method to use for estimating the quantile. There are many different methods, some unique to NumPy. - See the notes for explanation. The options aligning with the R types - and the H&F paper are: - - * (H&F 1): 'inverted_cdf' - * (H&F 2): 'averaged_inverted_cdf' - * (H&F 3): 'closest_observation' - * (H&F 4): 'interpolated_inverted_cdf' - * (H&F 5): 'hazen' - * (H&F 6): 'weibull' - * (H&F 7): 'linear' (default) - * (H&F 8): 'median_unbiased' - * (H&F 9): 'normal_unbiased' - - Mainly for compatibility reasons, NumPy also supports the following - options which appear to be unique to NumPy: + See the notes for explanation. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontiuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: * 'lower' * 'higher', @@ -1518,6 +1524,12 @@ def nanquantile( array([7., 2.]) >>> assert not np.all(a==b) + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ if interpolation is not None: method = function_base._check_interpolation_as_method( -- cgit v1.2.1 From 58dbe260a2e41c31f1ab03e1abdb1f01da4c1edc Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 13 Jan 2022 10:56:00 +0100 Subject: MAINT, DOC: discard repeated words --- numpy/lib/nanfunctions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index d7ea1ca65..cf76e7909 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -188,9 +188,8 @@ def _divide_by_count(a, b, out=None): """ Compute a/b ignoring invalid results. If `a` is an array the division is done in place. If `a` is a scalar, then its type is preserved in the - output. If out is None, then then a is used instead so that the - division is in place. Note that this is only called with `a` an inexact - type. + output. If out is None, then a is used instead so that the division + is in place. Note that this is only called with `a` an inexact type. Parameters ---------- -- cgit v1.2.1 From ea25a212110c8a6fb6d78bf5b6d7c351d7c8fa66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20de=20Menten?= Date: Wed, 28 Sep 2022 16:21:54 +0200 Subject: DOC: fixed two more typos in docstrings nanpercentile and nanquantile had the same typo I just fixed in percentile --- numpy/lib/nanfunctions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index cf76e7909..3814c0727 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1285,7 +1285,7 @@ def nanpercentile( 8. 'median_unbiased' 9. 'normal_unbiased' - The first three methods are discontiuous. NumPy further defines the + The first three methods are discontinuous. NumPy further defines the following discontinuous variations of the default 'linear' (7.) option: * 'lower' @@ -1445,7 +1445,7 @@ def nanquantile( 8. 'median_unbiased' 9. 'normal_unbiased' - The first three methods are discontiuous. NumPy further defines the + The first three methods are discontinuous. NumPy further defines the following discontinuous variations of the default 'linear' (7.) option: * 'lower' -- cgit v1.2.1 From 91432a36a3611c2374ea9e2d45592f0ac5e71adb Mon Sep 17 00:00:00 2001 From: Roy Smart Date: Fri, 2 Dec 2022 16:09:33 -0700 Subject: BUG: `keepdims=True` is ignored if `out` is not `None` in `numpy.median()`, `numpy.percentile()`, and `numpy.quantile()`. Closes #22714, #22544. --- numpy/lib/nanfunctions.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 3814c0727..ae2dfa165 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1214,12 +1214,9 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu if a.size == 0: return np.nanmean(a, axis, out=out, keepdims=keepdims) - r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out, + return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims, + axis=axis, out=out, overwrite_input=overwrite_input) - if keepdims and keepdims is not np._NoValue: - return r.reshape(k) - else: - return r def _nanpercentile_dispatcher( @@ -1556,17 +1553,14 @@ def _nanquantile_unchecked( # so deal them upfront if a.size == 0: return np.nanmean(a, axis, out=out, keepdims=keepdims) - r, k = function_base._ureduce(a, + return function_base._ureduce(a, func=_nanquantile_ureduce_func, q=q, + keepdims=keepdims, axis=axis, out=out, overwrite_input=overwrite_input, method=method) - if keepdims and keepdims is not np._NoValue: - return r.reshape(q.shape + k) - else: - return r def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, -- cgit v1.2.1 From b3c0960a54c81a26bd07912dda96db9e356b34d1 Mon Sep 17 00:00:00 2001 From: Matteo Raso <33975162+MatteoRaso@users.noreply.github.com> Date: Thu, 8 Dec 2022 07:01:59 -0500 Subject: BUG: Quantile function on complex number now throws an error (#22652) (#22703) Since percentile is more or less identical to quantile, I also made it throw an error if it receives a complex input. I also made nanquantile and nanpercentile throw errors as well. * Made the changes recommended by seberg * Fixed a test for PR 22703 * Fixed tests for quantile * Shortened some more lines * Fixup more lines Co-authored-by: Sebastian Berg --- numpy/lib/nanfunctions.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index ae2dfa165..786d2021e 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1373,6 +1373,9 @@ def nanpercentile( method, interpolation, "nanpercentile") a = np.asanyarray(a) + if a.dtype.kind == "c": + raise TypeError("a must be an array of real numbers") + q = np.true_divide(q, 100.0) # undo any decay that the ufunc performed (see gh-13105) q = np.asanyarray(q) @@ -1527,11 +1530,15 @@ def nanquantile( The American Statistician, 50(4), pp. 361-365, 1996 """ + if interpolation is not None: method = function_base._check_interpolation_as_method( method, interpolation, "nanquantile") a = np.asanyarray(a) + if a.dtype.kind == "c": + raise TypeError("a must be an array of real numbers") + q = np.asanyarray(q) if not function_base._quantile_is_valid(q): raise ValueError("Quantiles must be in the range [0, 1]") -- cgit v1.2.1 From 3f00488871ac169b1fd2f40495ad85cb581cc02b Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 16 Jan 2023 14:13:21 +0100 Subject: MAINT: Fix stacklevels for the new C dispatcher not adding one --- numpy/lib/nanfunctions.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 786d2021e..7e5528646 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -169,7 +169,7 @@ def _remove_nan_1d(arr1d, overwrite_input=False): s = np.nonzero(c)[0] if s.size == arr1d.size: warnings.warn("All-NaN slice encountered", RuntimeWarning, - stacklevel=5) + stacklevel=6) return arr1d[:0], True elif s.size == 0: return arr1d, overwrite_input @@ -343,7 +343,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): warnings.warn("All-NaN slice encountered", RuntimeWarning, - stacklevel=3) + stacklevel=2) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, +np.inf) @@ -357,7 +357,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, if np.any(mask): res = _copyto(res, np.nan, mask) warnings.warn("All-NaN axis encountered", RuntimeWarning, - stacklevel=3) + stacklevel=2) return res @@ -476,7 +476,7 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): warnings.warn("All-NaN slice encountered", RuntimeWarning, - stacklevel=3) + stacklevel=2) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, -np.inf) @@ -490,7 +490,7 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue, if np.any(mask): res = _copyto(res, np.nan, mask) warnings.warn("All-NaN axis encountered", RuntimeWarning, - stacklevel=3) + stacklevel=2) return res @@ -1049,7 +1049,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, isbad = (cnt == 0) if isbad.any(): - warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3) + warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) # NaN is the only possible bad value, so no further # action is needed to handle bad results. return avg @@ -1109,7 +1109,7 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) for i in range(np.count_nonzero(m.mask.ravel())): warnings.warn("All-NaN slice encountered", RuntimeWarning, - stacklevel=4) + stacklevel=5) fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan if out is not None: @@ -1763,7 +1763,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue, isbad = (dof <= 0) if np.any(isbad): warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, - stacklevel=3) + stacklevel=2) # NaN, inf, or negative numbers are all possible bad # values, so explicitly replace them with NaN. var = _copyto(var, np.nan, isbad) -- cgit v1.2.1 From 819d92116bd08a3732becd9895d48618588be3ba Mon Sep 17 00:00:00 2001 From: Christian Lorentzen Date: Tue, 4 Apr 2023 23:39:14 +0200 Subject: DOC quantile q is a probability --- numpy/lib/nanfunctions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'numpy/lib/nanfunctions.py') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 7e5528646..b3b570860 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1415,8 +1415,8 @@ def nanquantile( Input array or object that can be converted to an array, containing nan values to be ignored q : array_like of float - Quantile or sequence of quantiles to compute, which must be between - 0 and 1 inclusive. + Probability or sequence of probabilities for the quantiles to compute. + Values must be between 0 and 1 inclusive. axis : {int, tuple of int, None}, optional Axis or axes along which the quantiles are computed. The default is to compute the quantile(s) along a flattened @@ -1476,8 +1476,8 @@ def nanquantile( Returns ------- quantile : scalar or ndarray - If `q` is a single percentile and `axis=None`, then the result - is a scalar. If multiple quantiles are given, first axis of + If `q` is a single probability and `axis=None`, then the result + is a scalar. If multiple probability levels are given, first axis of the result corresponds to the quantiles. The other axes are the axes that remain after the reduction of `a`. If the input contains integers or floats smaller than ``float64``, the output -- cgit v1.2.1