BUG: Added proper handling of median and percentile when nan's are present in array to close issue #586.

Also added unit tests.
author: empeeu <empeeu@yahoo.com> 2014-03-08 09:32:03 -0500
committer: empeeu <empeeu@users.noreply.github.com> 2015-06-22 19:41:58 -0400
commit: a320fd772468004a53f7c448ae47032eb1b5c5df (patch)
tree: bf37a3da7ec96d2449f3b23642fc972990ed623a /numpy/lib/function_base.py
parent: 81c2c16f3218c879f5bfeacd80f237336e56584d (diff)
download: numpy-a320fd772468004a53f7c448ae47032eb1b5c5df.tar.gz
1 files changed, 92 insertions, 26 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 26d25cd6d..762d338bb 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -3029,41 +3029,37 @@ def _median(a, axis=None, out=None, overwrite_input=False):
     # can't be reasonably be implemented in terms of percentile as we have to
     # call mean to not break astropy
     a = np.asanyarray(a)
-    if axis is not None and axis >= a.ndim:
-        raise IndexError(
-            "axis %d out of bounds (%d)" % (axis, a.ndim))
+
+    # Set the partition indexes
+    if axis is None:
+        sz = a.size
+    else:
+        sz = a.shape[axis]
+    if sz % 2 == 0:
+        szh = sz // 2
+        kth = [szh - 1, szh]
+    else:
+        kth = [(sz - 1) // 2]
+    # Check if the array contains any nan's
+    if np.issubdtype(a.dtype, np.inexact):
+        kth.append(-1)
 
     if overwrite_input:
         if axis is None:
             part = a.ravel()
-            sz = part.size
-            if sz % 2 == 0:
-                szh = sz // 2
-                part.partition((szh - 1, szh))
-            else:
-                part.partition((sz - 1) // 2)
+            part.partition(kth)
         else:
-            sz = a.shape[axis]
-            if sz % 2 == 0:
-                szh = sz // 2
-                a.partition((szh - 1, szh), axis=axis)
-            else:
-                a.partition((sz - 1) // 2, axis=axis)
+            a.partition(kth, axis=axis)
             part = a
     else:
-        if axis is None:
-            sz = a.size
-        else:
-            sz = a.shape[axis]
-        if sz % 2 == 0:
-            part = partition(a, ((sz // 2) - 1, sz // 2), axis=axis)
-        else:
-            part = partition(a, (sz - 1) // 2, axis=axis)
+        part = partition(a, kth, axis=axis)
+
     if part.shape == ():
         # make 0-D arrays work
         return part.item()
     if axis is None:
         axis = 0
+
     indexer = [slice(None)] * part.ndim
     index = part.shape[axis] // 2
     if part.shape[axis] % 2 == 1:
@@ -3071,9 +3067,33 @@ def _median(a, axis=None, out=None, overwrite_input=False):
         indexer[axis] = slice(index, index+1)
     else:
         indexer[axis] = slice(index-1, index+1)
-    # Use mean in odd and even case to coerce data type
-    # and check, use out array.
-    return mean(part[indexer], axis=axis, out=out)
+
+    # Check if the array contains any nan's
+    if np.issubdtype(a.dtype, np.inexact):
+        # warn and return nans like mean would
+        rout = mean(part[indexer], axis=axis, out=out)
+        part = np.rollaxis(part, axis, part.ndim)
+        n = np.isnan(part[..., -1])
+        if rout.ndim == 0:
+            if n == True:
+                warnings.warn("Invalid value encountered in median",
+                              RuntimeWarning)
+                if out is not None:
+                    out[...] = a.dtype.type(np.nan)
+                    rout = out
+                else:
+                    rout = a.dtype.type(np.nan)
+        else:
+            for i in range(np.count_nonzero(n.ravel())):
+                warnings.warn("Invalid value encountered in median",
+                              RuntimeWarning)
+            rout[n] = np.nan
+        return rout
+    else:
+        # if there are no nans
+        # Use mean in odd and even case to coerce data type
+        # and check, use out array.
+        return mean(part[indexer], axis=axis, out=out)
 
 
 def percentile(a, q, axis=None, out=None,
@@ -3249,20 +3269,36 @@ def _percentile(a, q, axis=None, out=None,
             "interpolation can only be 'linear', 'lower' 'higher', "
             "'midpoint', or 'nearest'")
 
+    n = np.array(False, dtype=bool) # check for nan's flag
     if indices.dtype == intp:  # take the points along axis
+        # Check if the array contains any nan's
+        if np.issubdtype(a.dtype, np.inexact):
+            indices = concatenate((indices, [-1]))
+
         ap.partition(indices, axis=axis)
         # ensure axis with qth is first
         ap = np.rollaxis(ap, axis, 0)
         axis = 0
 
+        # Check if the array contains any nan's
+        if np.issubdtype(a.dtype, np.inexact):
+            indices = indices[:-1]
+            n = np.isnan(ap[-1:, ...])
+
         if zerod:
             indices = indices[0]
         r = take(ap, indices, axis=axis, out=out)
+
+
     else:  # weight the points above and below the indices
         indices_below = floor(indices).astype(intp)
         indices_above = indices_below + 1
         indices_above[indices_above > Nx - 1] = Nx - 1
 
+        # Check if the array contains any nan's
+        if np.issubdtype(a.dtype, np.inexact):
+            indices_above = concatenate((indices_above, [-1]))
+
         weights_above = indices - indices_below
         weights_below = 1.0 - weights_above
 
@@ -3272,6 +3308,18 @@ def _percentile(a, q, axis=None, out=None,
         weights_above.shape = weights_shape
 
         ap.partition(concatenate((indices_below, indices_above)), axis=axis)
+
+        # ensure axis with qth is first
+        ap = np.rollaxis(ap, axis, 0)
+        weights_below = np.rollaxis(weights_below, axis, 0)
+        weights_above = np.rollaxis(weights_above, axis, 0)
+        axis = 0
+
+        # Check if the array contains any nan's
+        if np.issubdtype(a.dtype, np.inexact):
+            indices_above = indices_above[:-1]
+            n = np.isnan(ap[-1:, ...])
+
         x1 = take(ap, indices_below, axis=axis) * weights_below
         x2 = take(ap, indices_above, axis=axis) * weights_above
 
@@ -3288,6 +3336,24 @@ def _percentile(a, q, axis=None, out=None,
         else:
             r = add(x1, x2)
 
+    if np.any(n):
+        warnings.warn("Invalid value encountered in median",
+                              RuntimeWarning)
+        if zerod:
+            if ap.ndim == 1:
+                if out is not None:
+                    out[...] = a.dtype.type(np.nan)
+                    r = out
+                else:
+                    r = a.dtype.type(np.nan)
+            else:
+                r[..., n.squeeze(0)] = a.dtype.type(np.nan)
+        else:
+            if r.ndim == 1:
+                r[:] = a.dtype.type(np.nan)
+            else:
+                r[..., n.repeat(q.size, 0)] = a.dtype.type(np.nan)
+
     return r
author	empeeu <empeeu@yahoo.com>	2014-03-08 09:32:03 -0500
committer	empeeu <empeeu@users.noreply.github.com>	2015-06-22 19:41:58 -0400
commit	a320fd772468004a53f7c448ae47032eb1b5c5df (patch)
tree	bf37a3da7ec96d2449f3b23642fc972990ed623a /numpy/lib/function_base.py
parent	81c2c16f3218c879f5bfeacd80f237336e56584d (diff)
download	numpy-a320fd772468004a53f7c448ae47032eb1b5c5df.tar.gz