summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorTravis Oliphant <oliphant@enthought.com>2010-05-15 22:11:10 +0000
committerTravis Oliphant <oliphant@enthought.com>2010-05-15 22:11:10 +0000
commit44b42db55290aedbf3e30ef2de81d0bccc547a04 (patch)
tree4e67a9d264683370da14076170c72e47880f09da /numpy/lib/function_base.py
parentc8a06aa54139120e3ecaae462b2d02cb8e92041c (diff)
downloadnumpy-44b42db55290aedbf3e30ef2de81d0bccc547a04.tar.gz
Add percentile function.
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py130
1 files changed, 128 insertions, 2 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 680103924..b42452b4a 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1,6 +1,6 @@
__docformat__ = "restructuredtext en"
__all__ = ['select', 'piecewise', 'trim_zeros',
- 'copy', 'iterable',
+ 'copy', 'iterable', 'percentile',
'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp',
'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
@@ -2804,7 +2804,7 @@ def median(a, axis=None, out=None, overwrite_input=False):
See Also
--------
- mean
+ mean, percentile
Notes
-----
@@ -2863,6 +2863,132 @@ def median(a, axis=None, out=None, overwrite_input=False):
# and check, use out array.
return mean(sorted[indexer], axis=axis, out=out)
+def percentile(a, q, axis=None, out=None, overwrite_input=False):
+ """
+ Compute the qth percentile of the data along the specified axis.
+
+ Returns the qth percentile of the array elements.
+
+ Parameters
+ ----------
+ a : array_like
+ Input array or object that can be converted to an array.
+ q : float in range of [0,100] (or sequence of floats)
+ percentile to compute which must be between 0 and 100 inclusive
+ axis : {None, int}, optional
+ Axis along which the percentiles are computed. The default (axis=None)
+ is to compute the median along a flattened version of the array.
+ out : ndarray, optional
+ Alternative output array in which to place the result. It must
+ have the same shape and buffer length as the expected output,
+ but the type (of the output) will be cast if necessary.
+ overwrite_input : {False, True}, optional
+ If True, then allow use of memory of input array (a) for
+ calculations. The input array will be modified by the call to
+ median. This will save memory when you do not need to preserve
+ the contents of the input array. Treat the input as undefined,
+ but it will probably be fully or partially sorted. Default is
+ False. Note that, if `overwrite_input` is True and the input
+ is not already an ndarray, an error will be raised.
+
+ Returns
+ -------
+ pcntile : ndarray
+ A new array holding the result (unless `out` is specified, in
+ which case that array is returned instead). If the input contains
+ integers, or floats of smaller precision than 64, then the output
+ data-type is float64. Otherwise, the output data-type is the same
+ as that of the input.
+
+ See Also
+ --------
+ mean, median
+
+ Notes
+ -----
+ Given a vector V of length N, the qth percentile of V is the qth ranked
+ value in a sorted copy of V. A weighted average of the two nearest neighbors
+ is used if the normalized ranking does not match q exactly.
+ The same as the median if q is 0.5; the same as the min if q is 0;
+ and the same as the max if q is 1
+
+ Examples
+ --------
+ >>> a = np.array([[10, 7, 4], [3, 2, 1]])
+ >>> a
+ array([[10, 7, 4],
+ [ 3, 2, 1]])
+ >>> np.percentile(a, 0.5)
+ 3.5
+ >>> np.percentile(a, 0.5, axis=0)
+ array([ 6.5, 4.5, 2.5])
+ >>> np.percentile(a, 0.5, axis=1)
+ array([ 7., 2.])
+ >>> m = np.percentile(a, 0.5, axis=0)
+ >>> out = np.zeros_like(m)
+ >>> np.percentile(a, 0.5, axis=0, out=m)
+ array([ 6.5, 4.5, 2.5])
+ >>> m
+ array([ 6.5, 4.5, 2.5])
+ >>> b = a.copy()
+ >>> np.percentile(b, 0.5, axis=1, overwrite_input=True)
+ array([ 7., 2.])
+ >>> assert not np.all(a==b)
+ >>> b = a.copy()
+ >>> np.percentile(b, 0.5, axis=None, overwrite_input=True)
+ 3.5
+ >>> assert not np.all(a==b)
+
+ """
+ if q == 0:
+ return a.min(axis=axis, out=out)
+ elif q == 100:
+ return a.max(axis=axis, out=out)
+
+ if overwrite_input:
+ if axis is None:
+ sorted = a.ravel()
+ sorted.sort()
+ else:
+ a.sort(axis=axis)
+ sorted = a
+ else:
+ sorted = sort(a, axis=axis)
+ if axis is None:
+ axis = 0
+
+ return _compute_qth_percentile(sorted, q, axis, out)
+
+# handle sequence of q's without calling sort multiple times
+def _compute_qth_percentile(sorted, q, axis, out):
+ if not isscalar(q):
+ return [_compute_qth_percentile(sorted, qi, axis, out)
+ for qi in q]
+ q = q / 100.0
+ if (q < 0) or (q > 1):
+ raise ValueError, "percentile must be either in the range [0,100]"
+
+ indexer = [slice(None)] * sorted.ndim
+ Nx = sorted.shape[axis]
+ index = q*(Nx-1)
+ i = int(index)
+ if i == index:
+ indexer[axis] = slice(i, i+1)
+ weights = array(1)
+ sumval = 1.0
+ else:
+ indexer[axis] = slice(i, i+2)
+ j = i + 1
+ weights = array([(j - index), (index - i)],float)
+ wshape = [1]*sorted.ndim
+ wshape[axis] = 2
+ weights.shape = wshape
+ sumval = weights.sum()
+
+ # Use add.reduce in both cases to coerce data type as well as
+ # check and use out array.
+ return add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval
+
def trapz(y, x=None, dx=1.0, axis=-1):
"""
Integrate along the given axis using the composite trapezoidal rule.