diff options
author | Travis Oliphant <oliphant@enthought.com> | 2010-05-15 22:11:10 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2010-05-15 22:11:10 +0000 |
commit | 44b42db55290aedbf3e30ef2de81d0bccc547a04 (patch) | |
tree | 4e67a9d264683370da14076170c72e47880f09da /numpy/lib/function_base.py | |
parent | c8a06aa54139120e3ecaae462b2d02cb8e92041c (diff) | |
download | numpy-44b42db55290aedbf3e30ef2de81d0bccc547a04.tar.gz |
Add percentile function.
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r-- | numpy/lib/function_base.py | 130 |
1 files changed, 128 insertions, 2 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 680103924..b42452b4a 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,6 +1,6 @@ __docformat__ = "restructuredtext en" __all__ = ['select', 'piecewise', 'trim_zeros', - 'copy', 'iterable', + 'copy', 'iterable', 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax', 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average', @@ -2804,7 +2804,7 @@ def median(a, axis=None, out=None, overwrite_input=False): See Also -------- - mean + mean, percentile Notes ----- @@ -2863,6 +2863,132 @@ def median(a, axis=None, out=None, overwrite_input=False): # and check, use out array. return mean(sorted[indexer], axis=axis, out=out) +def percentile(a, q, axis=None, out=None, overwrite_input=False): + """ + Compute the qth percentile of the data along the specified axis. + + Returns the qth percentile of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : float in range of [0,100] (or sequence of floats) + percentile to compute which must be between 0 and 100 inclusive + axis : {None, int}, optional + Axis along which the percentiles are computed. The default (axis=None) + is to compute the median along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : {False, True}, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + + Returns + ------- + pcntile : ndarray + A new array holding the result (unless `out` is specified, in + which case that array is returned instead). If the input contains + integers, or floats of smaller precision than 64, then the output + data-type is float64. Otherwise, the output data-type is the same + as that of the input. + + See Also + -------- + mean, median + + Notes + ----- + Given a vector V of length N, the qth percentile of V is the qth ranked + value in a sorted copy of V. A weighted average of the two nearest neighbors + is used if the normalized ranking does not match q exactly. + The same as the median if q is 0.5; the same as the min if q is 0; + and the same as the max if q is 1 + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.percentile(a, 0.5) + 3.5 + >>> np.percentile(a, 0.5, axis=0) + array([ 6.5, 4.5, 2.5]) + >>> np.percentile(a, 0.5, axis=1) + array([ 7., 2.]) + >>> m = np.percentile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.percentile(a, 0.5, axis=0, out=m) + array([ 6.5, 4.5, 2.5]) + >>> m + array([ 6.5, 4.5, 2.5]) + >>> b = a.copy() + >>> np.percentile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> np.percentile(b, 0.5, axis=None, overwrite_input=True) + 3.5 + >>> assert not np.all(a==b) + + """ + if q == 0: + return a.min(axis=axis, out=out) + elif q == 100: + return a.max(axis=axis, out=out) + + if overwrite_input: + if axis is None: + sorted = a.ravel() + sorted.sort() + else: + a.sort(axis=axis) + sorted = a + else: + sorted = sort(a, axis=axis) + if axis is None: + axis = 0 + + return _compute_qth_percentile(sorted, q, axis, out) + +# handle sequence of q's without calling sort multiple times +def _compute_qth_percentile(sorted, q, axis, out): + if not isscalar(q): + return [_compute_qth_percentile(sorted, qi, axis, out) + for qi in q] + q = q / 100.0 + if (q < 0) or (q > 1): + raise ValueError, "percentile must be either in the range [0,100]" + + indexer = [slice(None)] * sorted.ndim + Nx = sorted.shape[axis] + index = q*(Nx-1) + i = int(index) + if i == index: + indexer[axis] = slice(i, i+1) + weights = array(1) + sumval = 1.0 + else: + indexer[axis] = slice(i, i+2) + j = i + 1 + weights = array([(j - index), (index - i)],float) + wshape = [1]*sorted.ndim + wshape[axis] = 2 + weights.shape = wshape + sumval = weights.sum() + + # Use add.reduce in both cases to coerce data type as well as + # check and use out array. + return add.reduce(sorted[indexer]*weights, axis=axis, out=out)/sumval + def trapz(y, x=None, dx=1.0, axis=-1): """ Integrate along the given axis using the composite trapezoidal rule. |