ENH: added functionality nanpercentile to numpy

Implemented a nanpercentile and associated tests as an extension of np.percentile to complement the other nanfunctions.
author: David Freese <dfreese@stanford.edu> 2014-05-21 16:54:07 -0700
committer: David Freese <dfreese@stanford.edu> 2014-05-22 15:04:55 -0700
commit: 19aa50901e96a8428950da062d0d60c760232395 (patch)
tree: 63b3dbe753be7dc6b3cfcc2287fdb669e3354c88 /numpy/lib/nanfunctions.py
parent: 9dd46ee2ed9fc55942d9ec13532d817c3b36d322 (diff)
download: numpy-19aa50901e96a8428950da062d0d60c760232395.tar.gz
1 files changed, 173 insertions, 1 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 818e130a8..739dcc3ac 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -24,7 +24,7 @@ from numpy.lib.function_base import _ureduce as _ureduce
 
 __all__ = [
     'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
-    'nanmedian', 'nanvar', 'nanstd'
+    'nanmedian', 'nanpercentile', 'nanvar', 'nanstd'
     ]
 
 
@@ -743,6 +743,178 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
         return r
 
 
+def nanpercentile(a, q, axis=None, out=None,
+               overwrite_input=False, interpolation='linear', keepdims=False):
+    """
+    Compute the qth percentile of the data along the specified axis, while
+    ignoring nan values.
+
+    Returns the qth percentile of the array elements.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    q : float in range of [0,100] (or sequence of floats)
+        Percentile to compute which must be between 0 and 100 inclusive.
+    axis : int or sequence of int, optional
+        Axis along which the percentiles are computed. The default (None)
+        is to compute the percentiles along a flattened version of the array.
+        A sequence of axes is supported since version 1.9.0.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output,
+        but the type (of the output) will be cast if necessary.
+    overwrite_input : bool, optional
+        If True, then allow use of memory of input array `a` for
+        calculations. The input array will be modified by the call to
+        percentile. This will save memory when you do not need to preserve
+        the contents of the input array. In this case you should not make
+        any assumptions about the content of the passed in array `a` after
+        this function completes -- treat it as undefined. Default is False.
+        Note that, if the `a` input is not already an array this parameter
+        will have no effect, `a` will be converted to an array internally
+        regardless of the value of this parameter.
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+        This optional parameter specifies the interpolation method to use,
+        when the desired quantile lies between two data points `i` and `j`:
+            * linear: `i + (j - i) * fraction`, where `fraction` is the
+              fractional part of the index surrounded by `i` and `j`.
+            * lower: `i`.
+            * higher: `j`.
+            * nearest: `i` or `j` whichever is nearest.
+            * midpoint: (`i` + `j`) / 2.
+
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original `arr`.
+
+
+    Returns
+    -------
+    nanpercentile : scalar or ndarray
+        If a single percentile `q` is given and axis=None a scalar is
+        returned.  If multiple percentiles `q` are given an array holding
+        the result is returned. The results are listed in the first axis.
+        (If `out` is specified, in which case that array is returned
+        instead).  If the input contains integers, or floats of smaller
+        precision than 64, then the output data-type is float64. Otherwise,
+        the output data-type is the same as that of the input.
+
+    See Also
+    --------
+    nanmean, nanmedian, percentile, median, mean
+
+    Notes
+    -----
+    Given a vector V of length N, the q-th percentile of V is the q-th ranked
+    value in a sorted copy of V.  The values and distances of the two
+    nearest neighbors as well as the `interpolation` parameter will
+    determine the percentile if the normalized ranking does not match q
+    exactly. This function is the same as the median if ``q=50``, the same
+    as the minimum if ``q=0``and the same as the maximum if ``q=100``.
+
+    Examples
+    --------
+    >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
+    >>> a[0][1] = np.nan
+    >>> a
+    array([[ 10.,  nan,   4.],
+       [  3.,   2.,   1.]])
+    >>> np.percentile(a, 50)
+    nan
+    >>> np.nanpercentile(a, 50)
+    3.5
+    >>> np.nanpercentile(a, 50, axis=0)
+    array([[ 6.5,  4.5,  2.5]])
+    >>> np.nanpercentile(a, 50, axis=1)
+    array([[ 7.],
+           [ 2.]])
+    >>> m = np.nanpercentile(a, 50, axis=0)
+    >>> out = np.zeros_like(m)
+    >>> np.nanpercentile(a, 50, axis=0, out=m)
+    array([[ 6.5,  4.5,  2.5]])
+    >>> m
+    array([[ 6.5,  4.5,  2.5]])
+    >>> b = a.copy()
+    >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
+    array([[ 7.],
+           [ 2.]])
+    >>> assert not np.all(a==b)
+    >>> b = a.copy()
+    >>> np.nanpercentile(b, 50, axis=None, overwrite_input=True)
+    array([ 3.5])
+
+    """
+
+    a = np.asanyarray(a)
+    q = np.asanyarray(q)
+    # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
+    # so deal them upfront
+    if 0 in a.shape:
+        return np.nanmean(a, axis, out=out, keepdims=keepdims)
+
+    r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out,
+                    overwrite_input=overwrite_input,
+                    interpolation=interpolation)
+    if keepdims:
+        if q.ndim == 0:
+            return r.reshape(k)
+        else:
+            return r.reshape([len(q)] + k)
+    else:
+        return r
+
+
+def _nanpercentile(a, q, axis=None, out=None,
+                overwrite_input=False, interpolation='linear', keepdims=False):
+    """
+    Private function that doesn't support extended axis or keepdims.
+    These methods are extended to this function using _ureduce
+    See nanpercentile for parameter usage
+
+    """
+    if axis is None:
+        part = a.ravel()
+        result = _nanpercentile1d(part, q, overwrite_input, interpolation)
+    else:
+        result = np.apply_along_axis(_nanpercentile1d, axis, a, q, overwrite_input,
+                interpolation)
+
+    if out is not None:
+        out[:] = result
+    return result
+
+
+def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'):
+    """
+    Private function for rank 1 arrays. Compute percentile ignoring NaNs.
+    See nanpercentile for parameter usage
+    
+    """
+    c = np.isnan(arr1d)
+    s = np.where(c)[0]
+    if s.size == arr1d.size:
+        warnings.warn("All-NaN slice encountered", RuntimeWarning)
+        return np.nan
+    elif s.size == 0:
+        return np.percentile(arr1d, q, overwrite_input=overwrite_input,
+                interpolation=interpolation)
+    else:
+        if overwrite_input: 
+            x = arr1d
+        else: 
+            x = arr1d.copy() 
+        # select non-nans at end of array
+        enonan = arr1d[-s.size:][~c[-s.size:]]
+        # fill nans in beginning of array with non-nans of end
+        x[s[:enonan.size]] = enonan
+        # slice nans away
+        return np.percentile(x[:-s.size], q, overwrite_input=True,
+                interpolation=interpolation)
+
+
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
     """
     Compute the variance along the specified axis, while ignoring NaNs.
author	David Freese <dfreese@stanford.edu>	2014-05-21 16:54:07 -0700
committer	David Freese <dfreese@stanford.edu>	2014-05-22 15:04:55 -0700
commit	19aa50901e96a8428950da062d0d60c760232395 (patch)
tree	63b3dbe753be7dc6b3cfcc2287fdb669e3354c88 /numpy/lib/nanfunctions.py
parent	9dd46ee2ed9fc55942d9ec13532d817c3b36d322 (diff)
download	numpy-19aa50901e96a8428950da062d0d60c760232395.tar.gz