From 07cbe999f84be4d1b0a35fdb15b53cc17bc4341d Mon Sep 17 00:00:00 2001
From: Jonathan Helmus <jjhelmus@gmail.com>
Date: Wed, 28 Aug 2013 20:40:17 -0500
Subject: ENH: percentile function with additional parameters and vecorization

The percentile function was enhanced by adding limit and interpolation
parameters to give it similar functionality to SciPy's stats.scoreatpercentile
function.  In addition the function was vecorized along q and rewritten to
use the partition method for better performance.
---
 numpy/lib/function_base.py            | 156 ++++++++++++++++++++--------------
 numpy/lib/tests/test_function_base.py | 137 ++++++++++++++++++++++++-----
 2 files changed, 207 insertions(+), 86 deletions(-)

(limited to 'numpy/lib')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index ada54135e..55d104740 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -16,7 +16,8 @@ import sys
 import numpy.core.numeric as _nx
 from numpy.core import linspace
 from numpy.core.numeric import ones, zeros, arange, concatenate, array, \
-        asarray, asanyarray, empty, empty_like, ndarray, around
+        asarray, asanyarray, empty, empty_like, ndarray, around, floor, \
+        ceil, take
 from numpy.core.numeric import ScalarType, dot, where, newaxis, intp, \
         integer, isscalar
 from numpy.core.umath import pi, multiply, add, arctan2,  \
@@ -2765,7 +2766,9 @@ def median(a, axis=None, out=None, overwrite_input=False):
     # and check, use out array.
     return mean(part[indexer], axis=axis, out=out)
 
-def percentile(a, q, axis=None, out=None, overwrite_input=False):
+
+def percentile(a, q, limit=None, interpolation='linear', axis=None,
+               out=None, overwrite_input=False):
     """
     Compute the qth percentile of the data along the specified axis.
 
@@ -2777,29 +2780,40 @@ def percentile(a, q, axis=None, out=None, overwrite_input=False):
         Input array or object that can be converted to an array.
     q : float in range of [0,100] (or sequence of floats)
         Percentile to compute which must be between 0 and 100 inclusive.
+    limit : tuple, optional
+        Tuple of two scalars, the lower and upper limits within which to
+        compute the percentile. Values outside of this range are ommitted from
+        the percentile calculation. None includes all values in calculation.
+    interpolation : {'linear', 'lower', 'higher', 'midpoint'}, optional
+        This optional parameter specifies the interpolation method to use,
+        when the desired quantile lies between two data points `i` and `j`:
+            * linear: `i + (j - i) * fraction`, where `fraction` is the
+              fractional part of the index surrounded by `i` and `j`.
+            * lower: `i`.
+            * higher: `j`.
     axis : int, optional
         Axis along which the percentiles are computed. The default (None)
-        is to compute the median along a flattened version of the array.
+        is to compute the percentiles along a flattened version of the array.
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output,
         but the type (of the output) will be cast if necessary.
     overwrite_input : bool, optional
-       If True, then allow use of memory of input array `a` for
-       calculations. The input array will be modified by the call to
-       median. This will save memory when you do not need to preserve
-       the contents of the input array. Treat the input as undefined,
-       but it will probably be fully or partially sorted.
-       Default is False. Note that, if `overwrite_input` is True and the
-       input is not already an array, an error will be raised.
+        If True, then allow use of memory of input array `a` for
+        calculations. The input array will be modified by the call to
+        percentile. This will save memory when you do not need to preserve
+        the contents of the input array. Treat the input as undefined,
+        but it will probably be fully or partially sorted.
+        Default is False. Note that, if `overwrite_input` is True and the
+        input is not already an array, an error will be raised.
 
     Returns
     -------
-    pcntile : ndarray
+    percentile : ndarray
         A new array holding the result (unless `out` is specified, in
-        which case that array is returned instead).  If the input contains
+        which case that array is returned instead). If the input contains
         integers, or floats of smaller precision than 64, then the output
-        data-type is float64.  Otherwise, the output data-type is the same
+        data-type is float64. Otherwise, the output data-type is the same
         as that of the input.
 
     See Also
@@ -2809,7 +2823,7 @@ def percentile(a, q, axis=None, out=None, overwrite_input=False):
     Notes
     -----
     Given a vector V of length N, the qth percentile of V is the qth ranked
-    value in a sorted copy of V.  A weighted average of the two nearest
+    value in a sorted copy of V. A weighted average of the two nearest
     neighbors is used if the normalized ranking does not match q exactly.
     The same as the median if ``q=50``, the same as the minimum if ``q=0``
     and the same as the maximum if ``q=100``.
@@ -2818,87 +2832,97 @@ def percentile(a, q, axis=None, out=None, overwrite_input=False):
     --------
     >>> a = np.array([[10, 7, 4], [3, 2, 1]])
     >>> a
-    array([[10,  7,  4],
-           [ 3,  2,  1]])
+    array([[10, 7, 4],
+    [ 3, 2, 1]])
     >>> np.percentile(a, 50)
-    3.5
+    array([3.5])
     >>> np.percentile(a, 50, axis=0)
-    array([ 6.5,  4.5,  2.5])
+    array([ 6.5, 4.5, 2.5])
     >>> np.percentile(a, 50, axis=1)
-    array([ 7.,  2.])
+    array([[ 7.],
+    [2.]])
 
     >>> m = np.percentile(a, 50, axis=0)
     >>> out = np.zeros_like(m)
     >>> np.percentile(a, 50, axis=0, out=m)
-    array([ 6.5,  4.5,  2.5])
+    array([ 6.5, 4.5, 2.5])
     >>> m
-    array([ 6.5,  4.5,  2.5])
+    array([ 6.5, 4.5, 2.5])
 
     >>> b = a.copy()
     >>> np.percentile(b, 50, axis=1, overwrite_input=True)
-    array([ 7.,  2.])
+    array([[ 7.,
+    [2.]])
     >>> assert not np.all(a==b)
     >>> b = a.copy()
     >>> np.percentile(b, 50, axis=None, overwrite_input=True)
-    3.5
+    array([3.5])
 
     """
-    a = np.asarray(a)
+    a = asarray(a)
 
-    if q == 0:
-        return a.min(axis=axis, out=out)
-    elif q == 100:
-        return a.max(axis=axis, out=out)
+    if limit:  # filter a based on limits
+        a = a[(limit[0] <= a) & (a <= limit[1])]
+
+    q = atleast_1d(q)
+    q = q / 100.0
+    if (q < 0).any() or (q > 1).any():
+        raise ValueError("Percentiles must be in the range [0,100]")
 
+    # prepare a for partioning
     if overwrite_input:
         if axis is None:
-            sorted = a.ravel()
-            sorted.sort()
+            ap = a.ravel()
         else:
-            a.sort(axis=axis)
-            sorted = a
+            ap = a
     else:
-        sorted = sort(a, axis=axis)
+        if axis is None:
+            ap = a.flatten()
+        else:
+            ap = a.copy()
+
     if axis is None:
         axis = 0
 
-    return _compute_qth_percentile(sorted, q, axis, out)
+    Nx = ap.shape[axis]
+    indices = q * (Nx - 1)
+
+    # round fractional indices according to interpolation method
+    if interpolation == 'lower':
+        indices = floor(indices).astype(intp)
+    elif interpolation == 'higher':
+        indices = ceil(indices).astype(intp)
+    elif interpolation == 'linear':
+        pass  # keep index as fraction and interpolate
+    else:
+        raise ValueError("interpolation can only be 'linear', 'lower' "
+                         "or 'higher'")
 
-# handle sequence of q's without calling sort multiple times
-def _compute_qth_percentile(sorted, q, axis, out):
-    if not isscalar(q):
-        p = [_compute_qth_percentile(sorted, qi, axis, None)
-             for qi in q]
+    if indices.dtype == intp:  # take the points along axis
+        ap.partition(indices.copy(), axis=axis)
+        return take(ap, indices, axis=axis, out=out)
+    else:  # weight the points above and below the indices
+        indices_below = floor(indices).astype(intp)
+        indices_above = indices_below + 1
+        indices_above[indices_above > Nx - 1] = Nx - 1
 
-        if out is not None:
-            out.flat = p
+        weights_above = indices - indices_below
+        weights_below = 1.0 - weights_above
 
-        return p
+        weights_shape = [1, ] * ap.ndim
+        weights_shape[axis] = len(indices)
+        weights_below.shape = weights_shape
+        weights_above.shape = weights_shape
+
+        ap.partition(concatenate((indices_below, indices_above)), axis=axis)
+        x1 = take(ap, indices_below, axis=axis) * weights_below
+        x2 = take(ap, indices_above, axis=axis) * weights_above
+
+        if out is not None:
+            return add(x1, x2, out=out)
+        else:
+            return add(x1, x2)
 
-    q = q / 100.0
-    if (q < 0) or (q > 1):
-        raise ValueError("percentile must be either in the range [0,100]")
-
-    indexer = [slice(None)] * sorted.ndim
-    Nx = sorted.shape[axis]
-    index = q*(Nx-1)
-    i = int(index)
-    if i == index:
-        indexer[axis] = slice(i, i+1)
-        weights = array(1)
-        sumval = 1.0
-    else:
-        indexer[axis] = slice(i, i+2)
-        j = i + 1
-        weights = array([(j - index), (index - i)], float)
-        wshape = [1]*sorted.ndim
-        wshape[axis] = 2
-        weights.shape = wshape
-        sumval = weights.sum()
-
-    # Use add.reduce in both cases to coerce data type as well as
-    #   check and use out array.
-    return add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval
 
 def trapz(y, x=None, dx=1.0, axis=-1):
     """
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index f52eb5fbe..02597c78b 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1440,27 +1440,124 @@ def compare_results(res, desired):
         assert_array_equal(res[i], desired[i])
 
 
-def test_percentile_list():
-    assert_equal(np.percentile([1, 2, 3], 0), 1)
+class TestScoreatpercentile(TestCase):
 
-
-def test_percentile_out():
-    x = np.array([1, 2, 3])
-    y = np.zeros((3,))
-    p = (1, 2, 3)
-    np.percentile(x, p, out=y)
-    assert_equal(y, np.percentile(x, p))
-
-    x = np.array([[1, 2, 3],
-                  [4, 5, 6]])
-
-    y = np.zeros((3, 3))
-    np.percentile(x, p, axis=0, out=y)
-    assert_equal(y, np.percentile(x, p, axis=0))
-
-    y = np.zeros((3, 2))
-    np.percentile(x, p, axis=1, out=y)
-    assert_equal(y, np.percentile(x, p, axis=1))
+    def test_basic(self):
+        x = np.arange(8) * 0.5
+        assert_equal(np.percentile(x, 0), 0.)
+        assert_equal(np.percentile(x, 100), 3.5)
+        assert_equal(np.percentile(x, 50), 1.75)
+
+    def test_2D(self):
+        x = np.array([[1, 1, 1],
+                     [1, 1, 1],
+                     [4, 4, 3],
+                     [1, 1, 1],
+                     [1, 1, 1]])
+        assert_array_equal(np.percentile(x, 50, axis=0), [[1, 1, 1]])
+
+    def test_limit(self):
+        x = np.arange(10)
+        assert_equal(np.percentile(x, 50, limit=(2, 5)), 3.5)
+        assert_equal(np.percentile([2, 3, 4, 5], 50), 3.5)
+
+        assert_equal(np.percentile(x, 50, limit=(-1, 8)), 4)
+        assert_equal(np.percentile([0, 1, 2, 3, 4, 5, 6, 7, 8], 50), 4)
+
+        assert_equal(np.percentile(x, 50, limit=(4, 11)), 6.5)
+        assert_equal(np.percentile([4, 5, 6, 7, 8, 9], 50, ), 6.5)
+
+    def test_linear(self):
+
+        # Test defaults
+        assert_equal(np.percentile(range(10), 50), 4.5)
+        assert_equal(np.percentile(range(10), 50, (2, 7)), 4.5)
+        assert_equal(np.percentile(range(100), 50, limit=(1, 8)), 4.5)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (10, 100)), 55)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (1, 10)), 5.5)
+
+        # explicitly specify interpolation_method 'fraction' (the default)
+        assert_equal(np.percentile(range(10), 50,
+                                   interpolation='linear'), 4.5)
+        assert_equal(np.percentile(range(10), 50, limit=(2, 7),
+                                   interpolation='linear'), 4.5)
+        assert_equal(np.percentile(range(100), 50, limit=(1, 8),
+                                   interpolation='linear'), 4.5)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (10, 100),
+                                   interpolation='linear'), 55)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (1, 10),
+                                   interpolation='linear'), 5.5)
+
+    def test_lower_higher(self):
+
+        # interpolation_method 'lower'/'higher'
+        assert_equal(np.percentile(range(10), 50,
+                                   interpolation='lower'), 4)
+        assert_equal(np.percentile(range(10), 50,
+                                   interpolation='higher'), 5)
+        assert_equal(np.percentile(range(10), 50, (2, 7),
+                                   interpolation='lower'), 4)
+        assert_equal(np.percentile(range(10), 50, limit=(2, 7),
+                                   interpolation='higher'), 5)
+        assert_equal(np.percentile(range(100), 50, (1, 8),
+                                   interpolation='lower'), 4)
+        assert_equal(np.percentile(range(100), 50, (1, 8),
+                                   interpolation='higher'), 5)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (10, 100),
+                                   interpolation='lower'), 10)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, limit=(10, 100),
+                                   interpolation='higher'), 100)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (1, 10),
+                                   interpolation='lower'), 1)
+        assert_equal(np.percentile(np.array([1, 10, 100]), 50, limit=(1, 10),
+                                   interpolation='higher'), 10)
+
+    def test_sequence(self):
+        x = np.arange(8) * 0.5
+        assert_equal(np.percentile(x, [0, 100, 50]), [0, 3.5, 1.75])
+
+    def test_axis(self):
+        x = np.arange(12).reshape(3, 4)
+
+        assert_equal(np.percentile(x, (25, 50, 100)), [2.75, 5.5, 11.0])
+
+        r0 = [[2, 3, 4, 5], [4, 5, 6, 7], [8, 9, 10, 11]]
+        assert_equal(np.percentile(x, (25, 50, 100), axis=0), r0)
+
+        r1 = [[0.75, 1.5, 3], [4.75, 5.5, 7], [8.75, 9.5, 11]]
+        assert_equal(np.percentile(x, (25, 50, 100), axis=1), r1)
+
+    def test_exception(self):
+        assert_raises(ValueError, np.percentile, [1, 2], 56,
+                      interpolation='foobar')
+        assert_raises(ValueError, np.percentile, [1], 101)
+        assert_raises(ValueError, np.percentile, [1], -1)
+
+    def test_percentile_list(self):
+        assert_equal(np.percentile([1, 2, 3], 0), 1)
+
+    def test_percentile_out(self):
+        x = np.array([1, 2, 3])
+        y = np.zeros((3,))
+        p = (1, 2, 3)
+        np.percentile(x, p, out=y)
+        assert_equal(y, np.percentile(x, p))
+
+        x = np.array([[1, 2, 3],
+                      [4, 5, 6]])
+
+        y = np.zeros((3, 3))
+        np.percentile(x, p, axis=0, out=y)
+        assert_equal(y, np.percentile(x, p, axis=0))
+
+        y = np.zeros((2, 3))
+        np.percentile(x, p, axis=1, out=y)
+        assert_equal(y, np.percentile(x, p, axis=1))
+
+    def test_percentile_no_overwrite(self):
+        a = np.array([2, 3, 4, 1])
+        np.percentile(a, [50], overwrite_input=False)
+        assert_equal(a, np.array([2, 3, 4, 1]))
 
 
 class TestMedian(TestCase):
-- 
cgit v1.2.1


From 4a084a0d77bbb7ade065e75d3602fd8b47369d76 Mon Sep 17 00:00:00 2001
From: Jonathan Helmus <jjhelmus@gmail.com>
Date: Thu, 5 Sep 2013 11:20:56 -0500
Subject: MAINT: cleaning up percentile function.

---
 numpy/lib/function_base.py            | 24 ++++++++---------
 numpy/lib/tests/test_function_base.py | 49 +++++++----------------------------
 2 files changed, 21 insertions(+), 52 deletions(-)

(limited to 'numpy/lib')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 55d104740..48a86dff0 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -2767,8 +2767,8 @@ def median(a, axis=None, out=None, overwrite_input=False):
     return mean(part[indexer], axis=axis, out=out)
 
 
-def percentile(a, q, limit=None, interpolation='linear', axis=None,
-               out=None, overwrite_input=False):
+def percentile(a, q, interpolation='linear', axis=None, out=None,
+               overwrite_input=False):
     """
     Compute the qth percentile of the data along the specified axis.
 
@@ -2780,17 +2780,15 @@ def percentile(a, q, limit=None, interpolation='linear', axis=None,
         Input array or object that can be converted to an array.
     q : float in range of [0,100] (or sequence of floats)
         Percentile to compute which must be between 0 and 100 inclusive.
-    limit : tuple, optional
-        Tuple of two scalars, the lower and upper limits within which to
-        compute the percentile. Values outside of this range are ommitted from
-        the percentile calculation. None includes all values in calculation.
-    interpolation : {'linear', 'lower', 'higher', 'midpoint'}, optional
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'closest'}
         This optional parameter specifies the interpolation method to use,
         when the desired quantile lies between two data points `i` and `j`:
             * linear: `i + (j - i) * fraction`, where `fraction` is the
               fractional part of the index surrounded by `i` and `j`.
             * lower: `i`.
             * higher: `j`.
+            * closest: `i` or `j` whichever is closest.
+            * midpoint: (`i` + `j`) / 2.
     axis : int, optional
         Axis along which the percentiles are computed. The default (None)
         is to compute the percentiles along a flattened version of the array.
@@ -2860,10 +2858,6 @@ def percentile(a, q, limit=None, interpolation='linear', axis=None,
 
     """
     a = asarray(a)
-
-    if limit:  # filter a based on limits
-        a = a[(limit[0] <= a) & (a <= limit[1])]
-
     q = atleast_1d(q)
     q = q / 100.0
     if (q < 0).any() or (q > 1).any():
@@ -2892,14 +2886,18 @@ def percentile(a, q, limit=None, interpolation='linear', axis=None,
         indices = floor(indices).astype(intp)
     elif interpolation == 'higher':
         indices = ceil(indices).astype(intp)
+    elif interpolation == 'midpoint':
+        indices = floor(indices) + 0.5
+    elif interpolation == 'closest':
+        indices = around(indices).astype(intp)
     elif interpolation == 'linear':
         pass  # keep index as fraction and interpolate
     else:
         raise ValueError("interpolation can only be 'linear', 'lower' "
-                         "or 'higher'")
+                         "'higher', 'midpoint', or 'closest'")
 
     if indices.dtype == intp:  # take the points along axis
-        ap.partition(indices.copy(), axis=axis)
+        ap.partition(indices, axis=axis)
         return take(ap, indices, axis=axis, out=out)
     else:  # weight the points above and below the indices
         indices_below = floor(indices).astype(intp)
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 02597c78b..f0512420e 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1456,37 +1456,14 @@ class TestScoreatpercentile(TestCase):
                      [1, 1, 1]])
         assert_array_equal(np.percentile(x, 50, axis=0), [[1, 1, 1]])
 
-    def test_limit(self):
-        x = np.arange(10)
-        assert_equal(np.percentile(x, 50, limit=(2, 5)), 3.5)
-        assert_equal(np.percentile([2, 3, 4, 5], 50), 3.5)
-
-        assert_equal(np.percentile(x, 50, limit=(-1, 8)), 4)
-        assert_equal(np.percentile([0, 1, 2, 3, 4, 5, 6, 7, 8], 50), 4)
-
-        assert_equal(np.percentile(x, 50, limit=(4, 11)), 6.5)
-        assert_equal(np.percentile([4, 5, 6, 7, 8, 9], 50, ), 6.5)
-
     def test_linear(self):
 
         # Test defaults
         assert_equal(np.percentile(range(10), 50), 4.5)
-        assert_equal(np.percentile(range(10), 50, (2, 7)), 4.5)
-        assert_equal(np.percentile(range(100), 50, limit=(1, 8)), 4.5)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (10, 100)), 55)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (1, 10)), 5.5)
 
         # explicitly specify interpolation_method 'fraction' (the default)
         assert_equal(np.percentile(range(10), 50,
                                    interpolation='linear'), 4.5)
-        assert_equal(np.percentile(range(10), 50, limit=(2, 7),
-                                   interpolation='linear'), 4.5)
-        assert_equal(np.percentile(range(100), 50, limit=(1, 8),
-                                   interpolation='linear'), 4.5)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (10, 100),
-                                   interpolation='linear'), 55)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (1, 10),
-                                   interpolation='linear'), 5.5)
 
     def test_lower_higher(self):
 
@@ -1495,22 +1472,16 @@ class TestScoreatpercentile(TestCase):
                                    interpolation='lower'), 4)
         assert_equal(np.percentile(range(10), 50,
                                    interpolation='higher'), 5)
-        assert_equal(np.percentile(range(10), 50, (2, 7),
-                                   interpolation='lower'), 4)
-        assert_equal(np.percentile(range(10), 50, limit=(2, 7),
-                                   interpolation='higher'), 5)
-        assert_equal(np.percentile(range(100), 50, (1, 8),
-                                   interpolation='lower'), 4)
-        assert_equal(np.percentile(range(100), 50, (1, 8),
-                                   interpolation='higher'), 5)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (10, 100),
-                                   interpolation='lower'), 10)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, limit=(10, 100),
-                                   interpolation='higher'), 100)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, (1, 10),
-                                   interpolation='lower'), 1)
-        assert_equal(np.percentile(np.array([1, 10, 100]), 50, limit=(1, 10),
-                                   interpolation='higher'), 10)
+
+    def test_midpoint(self):
+        assert_equal(np.percentile(range(10), 51,
+                                   interpolation='midpoint'), 4.5)
+
+    def test_closest(self):
+        assert_equal(np.percentile(range(10), 51,
+                                   interpolation='closest'), 5)
+        assert_equal(np.percentile(range(10), 49,
+                                   interpolation='closest'), 4)
 
     def test_sequence(self):
         x = np.arange(8) * 0.5
-- 
cgit v1.2.1


From a7fc781d286ed8c7650e3a153f8762ce8a536da0 Mon Sep 17 00:00:00 2001
From: Jonathan Helmus <jjhelmus@gmail.com>
Date: Fri, 13 Sep 2013 16:57:17 -0500
Subject: DOC: changes to scoreatpercentile docstring, doc test now passes

---
 numpy/lib/function_base.py | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'numpy/lib')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 48a86dff0..1f6484959 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -2800,10 +2800,11 @@ def percentile(a, q, interpolation='linear', axis=None, out=None,
         If True, then allow use of memory of input array `a` for
         calculations. The input array will be modified by the call to
         percentile. This will save memory when you do not need to preserve
-        the contents of the input array. Treat the input as undefined,
-        but it will probably be fully or partially sorted.
-        Default is False. Note that, if `overwrite_input` is True and the
-        input is not already an array, an error will be raised.
+        the contents of the input array. In this case you should not make
+        any assumptions about the content of the passed in array `a` after
+        this function completes -- treat it as undefined. Default is False.
+        Note that, if `overwrite_input` is True and the input is not
+        already an array, an error will be raised.
 
     Returns
     -------
@@ -2821,40 +2822,41 @@ def percentile(a, q, interpolation='linear', axis=None, out=None,
     Notes
     -----
     Given a vector V of length N, the qth percentile of V is the qth ranked
-    value in a sorted copy of V. A weighted average of the two nearest
-    neighbors is used if the normalized ranking does not match q exactly.
-    The same as the median if ``q=50``, the same as the minimum if ``q=0``
-    and the same as the maximum if ``q=100``.
+    value in a sorted copy of V.  The values and distances of the two nearest
+    neighbors as well as the `interpolation` parameter will determine the
+    percentile if the normalized ranking does not match q exactly. This
+    function is the same as the median if ``q=50``, the same as the minimum
+    if ``q=0``and the same as the maximum if ``q=100``.
 
     Examples
     --------
     >>> a = np.array([[10, 7, 4], [3, 2, 1]])
     >>> a
-    array([[10, 7, 4],
-    [ 3, 2, 1]])
+    array([[10,  7,  4],
+           [ 3,  2,  1]])
     >>> np.percentile(a, 50)
-    array([3.5])
+    array([ 3.5])
     >>> np.percentile(a, 50, axis=0)
-    array([ 6.5, 4.5, 2.5])
+    array([[ 6.5,  4.5,  2.5]])
     >>> np.percentile(a, 50, axis=1)
     array([[ 7.],
-    [2.]])
+           [ 2.]])
 
     >>> m = np.percentile(a, 50, axis=0)
     >>> out = np.zeros_like(m)
     >>> np.percentile(a, 50, axis=0, out=m)
-    array([ 6.5, 4.5, 2.5])
+    array([[ 6.5,  4.5,  2.5]])
     >>> m
-    array([ 6.5, 4.5, 2.5])
+    array([[ 6.5,  4.5,  2.5]])
 
     >>> b = a.copy()
     >>> np.percentile(b, 50, axis=1, overwrite_input=True)
-    array([[ 7.,
-    [2.]])
+    array([[ 7.],
+           [ 2.]])
     >>> assert not np.all(a==b)
     >>> b = a.copy()
     >>> np.percentile(b, 50, axis=None, overwrite_input=True)
-    array([3.5])
+    array([ 3.5])
 
     """
     a = asarray(a)
-- 
cgit v1.2.1


From 9aed31a8ba1607241947bfe886821e9eb09f6ebb Mon Sep 17 00:00:00 2001
From: Jonathan Helmus <jjhelmus@gmail.com>
Date: Fri, 13 Sep 2013 17:00:27 -0500
Subject: TST: additional test for percentile

---
 numpy/lib/tests/test_function_base.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'numpy/lib')

diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index f0512420e..6923f0004 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1530,6 +1530,9 @@ class TestScoreatpercentile(TestCase):
         np.percentile(a, [50], overwrite_input=False)
         assert_equal(a, np.array([2, 3, 4, 1]))
 
+        np.percentile(a, [50])
+        assert_equal(a, np.array([2, 3, 4, 1]))
+
 
 class TestMedian(TestCase):
     def test_basic(self):
-- 
cgit v1.2.1


From 9dd212cee1c9ccab6013d52e776bcf6ef712a5e0 Mon Sep 17 00:00:00 2001
From: Jonathan Helmus <jjhelmus@gmail.com>
Date: Mon, 16 Sep 2013 10:32:25 -0600
Subject: MAINT: changed 'closest' interpolation to 'nearest'

---
 numpy/lib/function_base.py            | 8 ++++----
 numpy/lib/tests/test_function_base.py | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'numpy/lib')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 1f6484959..9475c2edf 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -2780,14 +2780,14 @@ def percentile(a, q, interpolation='linear', axis=None, out=None,
         Input array or object that can be converted to an array.
     q : float in range of [0,100] (or sequence of floats)
         Percentile to compute which must be between 0 and 100 inclusive.
-    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'closest'}
+    interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
         This optional parameter specifies the interpolation method to use,
         when the desired quantile lies between two data points `i` and `j`:
             * linear: `i + (j - i) * fraction`, where `fraction` is the
               fractional part of the index surrounded by `i` and `j`.
             * lower: `i`.
             * higher: `j`.
-            * closest: `i` or `j` whichever is closest.
+            * nearest: `i` or `j` whichever is nearest.
             * midpoint: (`i` + `j`) / 2.
     axis : int, optional
         Axis along which the percentiles are computed. The default (None)
@@ -2890,13 +2890,13 @@ def percentile(a, q, interpolation='linear', axis=None, out=None,
         indices = ceil(indices).astype(intp)
     elif interpolation == 'midpoint':
         indices = floor(indices) + 0.5
-    elif interpolation == 'closest':
+    elif interpolation == 'nearest':
         indices = around(indices).astype(intp)
     elif interpolation == 'linear':
         pass  # keep index as fraction and interpolate
     else:
         raise ValueError("interpolation can only be 'linear', 'lower' "
-                         "'higher', 'midpoint', or 'closest'")
+                         "'higher', 'midpoint', or 'nearest'")
 
     if indices.dtype == intp:  # take the points along axis
         ap.partition(indices, axis=axis)
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 6923f0004..a69c82e18 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1477,11 +1477,11 @@ class TestScoreatpercentile(TestCase):
         assert_equal(np.percentile(range(10), 51,
                                    interpolation='midpoint'), 4.5)
 
-    def test_closest(self):
+    def test_nearest(self):
         assert_equal(np.percentile(range(10), 51,
-                                   interpolation='closest'), 5)
+                                   interpolation='nearest'), 5)
         assert_equal(np.percentile(range(10), 49,
-                                   interpolation='closest'), 4)
+                                   interpolation='nearest'), 4)
 
     def test_sequence(self):
         x = np.arange(8) * 0.5
-- 
cgit v1.2.1


From 9316110a42c370616cbb80ae3e1769534d04de10 Mon Sep 17 00:00:00 2001
From: Jonathan Helmus <jjhelmus@gmail.com>
Date: Mon, 16 Sep 2013 10:44:26 -0600
Subject: TST: note on overwrite_input parameter in percentile

* added note that `overwrite_input` has not effect when `a` is not
  an array in the percentile function.
* added unit test to verify that no error is raised when `a` is not
  an array and `overwrite_input` is True.
---
 numpy/lib/function_base.py            |  5 +++--
 numpy/lib/tests/test_function_base.py | 10 ++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'numpy/lib')

diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 9475c2edf..472d7eecc 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -2803,8 +2803,9 @@ def percentile(a, q, interpolation='linear', axis=None, out=None,
         the contents of the input array. In this case you should not make
         any assumptions about the content of the passed in array `a` after
         this function completes -- treat it as undefined. Default is False.
-        Note that, if `overwrite_input` is True and the input is not
-        already an array, an error will be raised.
+        Note that, if the `a` input is not already an array this parameter
+        will have no effect, `a` will be converted to an array internally
+        regardless of the value of this parameter.
 
     Returns
     -------
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index a69c82e18..dd0b6e0ee 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1530,9 +1530,19 @@ class TestScoreatpercentile(TestCase):
         np.percentile(a, [50], overwrite_input=False)
         assert_equal(a, np.array([2, 3, 4, 1]))
 
+        a = np.array([2, 3, 4, 1])
         np.percentile(a, [50])
         assert_equal(a, np.array([2, 3, 4, 1]))
 
+    def test_percentile_overwrite(self):
+        a = np.array([2, 3, 4, 1])
+        b = np.percentile(a, [50], overwrite_input=True)
+        assert_equal(b, np.array([2.5]))
+
+        b = np.percentile([2, 3, 4, 1], [50], overwrite_input=True)
+        assert_equal(b, np.array([2.5]))
+
+
 
 class TestMedian(TestCase):
     def test_basic(self):
-- 
cgit v1.2.1