BUG: handle unmasked NaN in ma.median like normal median

This requires to base masked median on sort(endwith=False) as we need to distinguish Inf and NaN. Using Inf as filler element of the sort does not work as then the mask is not guaranteed to be at the end. Closes gh-8340 Also fixed 1d ma.median not handling np.inf correctly, the nd variant was ok.
author: Julian Taylor <jtaylor.debian@googlemail.com> 2016-12-06 00:37:53 +0100
committer: Julian Taylor <jtaylor.debian@googlemail.com> 2016-12-12 23:19:06 +0100
commit: ff4758ff8432077c06cb580b97c6080f9b312c5a (patch)
tree: a40b77c06a1446f053f405a31bf0bcff91d3d8eb /numpy/ma/extras.py
parent: e00b9587052248486a9bf66c2ce95638c0d9817f (diff)
download: numpy-ff4758ff8432077c06cb580b97c6080f9b312c5a.tar.gz
1 files changed, 42 insertions, 10 deletions
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index e4ff8ef2d..dadf032e0 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -699,15 +699,21 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
         return r
 
 def _median(a, axis=None, out=None, overwrite_input=False):
+    # when an unmasked NaN is present return it, so we need to sort the NaN
+    # values behind the mask
+    if np.issubdtype(a.dtype, np.inexact):
+        fill_value = np.inf
+    else:
+        fill_value = None
     if overwrite_input:
         if axis is None:
             asorted = a.ravel()
-            asorted.sort()
+            asorted.sort(fill_value=fill_value)
         else:
-            a.sort(axis=axis)
+            a.sort(axis=axis, fill_value=fill_value)
             asorted = a
     else:
-        asorted = sort(a, axis=axis)
+        asorted = sort(a, axis=axis, fill_value=fill_value)
 
     if axis is None:
         axis = 0
@@ -715,8 +721,23 @@ def _median(a, axis=None, out=None, overwrite_input=False):
         axis += asorted.ndim
 
     if asorted.ndim == 1:
+        counts = count(asorted)
         idx, odd = divmod(count(asorted), 2)
-        return asorted[idx + odd - 1 : idx + 1].mean(out=out)
+        mid = asorted[idx + odd - 1 : idx + 1]
+        if np.issubdtype(asorted.dtype, np.inexact) and asorted.size > 0:
+            # avoid inf / x = masked
+            s = mid.sum(out=out)
+            np.true_divide(s, 2., casting='unsafe')
+            s = np.lib.utils._median_nancheck(asorted, s, axis, out)
+        else:
+            s = mid.mean(out=out)
+
+        # if result is masked either the input contained enough
+        # minimum_fill_value so that it would be the median or all values
+        # masked
+        if np.ma.is_masked(s) and not np.all(asorted.mask):
+            return np.ma.minimum_fill_value(asorted)
+        return s
 
     counts = count(asorted, axis=axis)
     h = counts // 2
@@ -727,24 +748,35 @@ def _median(a, axis=None, out=None, overwrite_input=False):
     ind = np.meshgrid(*axes_grid, sparse=True, indexing='ij')
 
     # insert indices of low and high median
-    ind.insert(axis, np.maximum(0, h - 1))
+    ind.insert(axis, h - 1)
     low = asorted[tuple(ind)]
-    ind[axis] = h
+    ind[axis] = np.minimum(h, asorted.shape[axis] - 1)
     high = asorted[tuple(ind)]
 
     # duplicate high if odd number of elements so mean does nothing
     odd = counts % 2 == 1
-    if asorted.ndim > 1:
-        np.copyto(low, high, where=odd)
-    elif odd:
-        low = high
+    np.copyto(low, high, where=odd)
+    # not necessary for scalar True/False masks
+    try:
+        np.copyto(low.mask, high.mask, where=odd)
+    except:
+        pass
 
     if np.issubdtype(asorted.dtype, np.inexact):
         # avoid inf / x = masked
         s = np.ma.sum([low, high], axis=0, out=out)
         np.true_divide(s.data, 2., casting='unsafe', out=s.data)
+
+        s = np.lib.utils._median_nancheck(asorted, s, axis, out)
     else:
         s = np.ma.mean([low, high], axis=0, out=out)
+
+    # if result is masked either the input contained enough minimum_fill_value
+    # so that it would be the median or all values masked
+    if np.ma.is_masked(s):
+        rep = (~np.all(asorted.mask, axis=axis)) & s.mask
+        s.data[rep] = np.ma.minimum_fill_value(asorted)
+        s.mask[rep] = False
     return s
author	Julian Taylor <jtaylor.debian@googlemail.com>	2016-12-06 00:37:53 +0100
committer	Julian Taylor <jtaylor.debian@googlemail.com>	2016-12-12 23:19:06 +0100
commit	ff4758ff8432077c06cb580b97c6080f9b312c5a (patch)
tree	a40b77c06a1446f053f405a31bf0bcff91d3d8eb /numpy/ma/extras.py
parent	e00b9587052248486a9bf66c2ce95638c0d9817f (diff)
download	numpy-ff4758ff8432077c06cb580b97c6080f9b312c5a.tar.gz