summaryrefslogtreecommitdiff
path: root/numpy/lib/nanfunctions.py
diff options
context:
space:
mode:
authorDavid Freese <dfreese@stanford.edu>2014-02-16 08:51:16 -0800
committerDavid Freese <dfreese@stanford.edu>2014-05-02 08:57:27 -0700
commitbeec75be6f96a5c0fc9496b587e68eb03bb4a6ba (patch)
treea5bfadd37ec0ffdb9249d18d21179d25e8c9ec32 /numpy/lib/nanfunctions.py
parenta0cf18394d5ce33514fdc37093bd2f65ad4b0dde (diff)
downloadnumpy-beec75be6f96a5c0fc9496b587e68eb03bb4a6ba.tar.gz
ENH: added functionality nanmedian to numpy
Implemented a nanmedian and associated tests as an extension of np.median to complement the other nanfunctions Added negative values to the unit tests Cleaned up documentation of nanmedian
Diffstat (limited to 'numpy/lib/nanfunctions.py')
-rw-r--r--numpy/lib/nanfunctions.py146
1 files changed, 144 insertions, 2 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index badba32da..818e130a8 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -17,12 +17,14 @@ Functions
from __future__ import division, absolute_import, print_function
import warnings
+import operator
import numpy as np
-
+from numpy.core.fromnumeric import partition
+from numpy.lib.function_base import _ureduce as _ureduce
__all__ = [
'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
- 'nanvar', 'nanstd'
+ 'nanmedian', 'nanvar', 'nanstd'
]
@@ -601,6 +603,146 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
return avg
+def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays
+ """
+ Private function for rank 1 arrays. Compute the median ignoring NaNs.
+ See nanmedian for parameter usage
+
+ """
+ c = np.isnan(arr1d)
+ s = np.where(c)[0]
+ if s.size == arr1d.size:
+ warnings.warn("All-NaN slice encountered", RuntimeWarning)
+ return np.nan
+ elif s.size == 0:
+ return np.median(arr1d, overwrite_input=overwrite_input)
+ else:
+ if overwrite_input:
+ x = arr1d
+ else:
+ x = arr1d.copy()
+ # select non-nans at end of array
+ enonan = arr1d[-s.size:][~c[-s.size:]]
+ # fill nans in beginning of array with non-nans of end
+ x[s[:enonan.size]] = enonan
+ # slice nans away
+ return np.median(x[:-s.size], overwrite_input=True)
+
+
+def _nanmedian(a, axis=None, out=None, overwrite_input=False):
+ """
+ Private function that doesn't support extended axis or keepdims.
+ These methods are extended to this function using _ureduce
+ See nanmedian for parameter usage
+
+ """
+ if axis is None:
+ part = a.ravel()
+ if out is None:
+ return _nanmedian1d(part, overwrite_input)
+ else:
+ out[:] = _nanmedian1d(part, overwrite_input)
+ return out
+ else:
+ result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
+ if out is not None:
+ out[:] = result
+ return result
+
+
+def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
+ """
+ Compute the median along the specified axis, while ignoring NaNs.
+
+ Returns the median of the array elements.
+
+ .. versionadded:: 1.9.0
+
+ Parameters
+ ----------
+ a : array_like
+ Input array or object that can be converted to an array.
+ axis : int, optional
+ Axis along which the medians are computed. The default (axis=None)
+ is to compute the median along a flattened version of the array.
+ A sequence of axes is supported since version 1.9.0.
+ out : ndarray, optional
+ Alternative output array in which to place the result. It must have
+ the same shape and buffer length as the expected output, but the
+ type (of the output) will be cast if necessary.
+ overwrite_input : bool, optional
+ If True, then allow use of memory of input array (a) for
+ calculations. The input array will be modified by the call to
+ median. This will save memory when you do not need to preserve
+ the contents of the input array. Treat the input as undefined,
+ but it will probably be fully or partially sorted. Default is
+ False. Note that, if `overwrite_input` is True and the input
+ is not already an ndarray, an error will be raised.
+ keepdims : bool, optional
+ If this is set to True, the axes which are reduced are left
+ in the result as dimensions with size one. With this option,
+ the result will broadcast correctly against the original `arr`.
+
+
+
+ Returns
+ -------
+ median : ndarray
+ A new array holding the result. If the input contains integers, or
+ floats of smaller precision than 64, then the output data-type is
+ float64. Otherwise, the output data-type is the same as that of the
+ input.
+
+ See Also
+ --------
+ mean, median, percentile
+
+ Notes
+ -----
+ Given a vector V of length N, the median of V is the middle value of
+ a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is
+ odd. When N is even, it is the average of the two middle values of
+ ``V_sorted``.
+
+ Examples
+ --------
+ >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
+ >>> a[0, 1] = np.nan
+ >>> a
+ array([[ 10., nan, 4.],
+ [ 3., 2., 1.]])
+ >>> np.median(a)
+ nan
+ >>> np.nanmedian(a)
+ 3.0
+ >>> np.nanmedian(a, axis=0)
+ array([ 6.5, 2., 2.5])
+ >>> np.median(a, axis=1)
+ array([ 7., 2.])
+ >>> b = a.copy()
+ >>> np.nanmedian(b, axis=1, overwrite_input=True)
+ array([ 7., 2.])
+ >>> assert not np.all(a==b)
+ >>> b = a.copy()
+ >>> np.nanmedian(b, axis=None, overwrite_input=True)
+ 3.0
+ >>> assert not np.all(a==b)
+
+ """
+ a = np.asanyarray(a)
+ # apply_along_axis in _nanmedian doesn't handle empty arrays well,
+ # so deal them upfront
+ if 0 in a.shape:
+ return np.nanmean(a, axis, out=out, keepdims=keepdims)
+
+ r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
+ overwrite_input=overwrite_input)
+ if keepdims:
+ return r.reshape(k)
+ else:
+ return r
+
+
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
"""
Compute the variance along the specified axis, while ignoring NaNs.