summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py143
1 files changed, 133 insertions, 10 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 29308d443..4701cb03e 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -4,23 +4,23 @@ __all__ = ['logspace', 'linspace',
'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp',
'unique', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
- 'histogram', 'bincount', 'digitize', 'cov', 'corrcoef', 'msort',
- 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman',
- 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', 'meshgrid',
- 'delete', 'insert', 'append'
+ 'histogram', 'histogramnd', 'bincount', 'digitize', 'cov',
+ 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning',
+ 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc',
+ 'add_docstring', 'meshgrid', 'delete', 'insert', 'append'
]
import types
import numpy.core.numeric as _nx
from numpy.core.numeric import ones, zeros, arange, concatenate, array, \
- asarray, asanyarray, empty, empty_like, asanyarray, ndarray
+ asarray, asanyarray, empty, empty_like, asanyarray, ndarray, around
from numpy.core.numeric import ScalarType, dot, where, newaxis, intp, \
- integer
+ integer, isscalar
from numpy.core.umath import pi, multiply, add, arctan2, \
- frompyfunc, isnan, cos, less_equal, sqrt, sin, mod, exp
+ frompyfunc, isnan, cos, less_equal, sqrt, sin, mod, exp, log10
from numpy.core.fromnumeric import ravel, nonzero, choose, sort
from numpy.core.numerictypes import typecodes
-from numpy.lib.shape_base import atleast_1d
+from numpy.lib.shape_base import atleast_1d, atleast_2d
from numpy.lib.twodim_base import diag
from _compiled_base import _insert, add_docstring
from _compiled_base import digitize, bincount
@@ -75,8 +75,7 @@ def histogram(a, bins=10, range=None, normed=False):
----------
bins: Number of bins
range: Lower and upper bin edges (default: [sample.min(), sample.max()]).
- Does not really work, all values greater than range are stored in
- the last bin.
+ All values greater than range are stored in the last bin.
normed: If False (default), return the number of samples in each bin.
If True, return a frequency distribution.
@@ -104,6 +103,130 @@ def histogram(a, bins=10, range=None, normed=False):
else:
return n, bins
+def histogramnd(sample, bins=10, range=None, normed=False):
+ """histogramnd(sample, bins = 10, range = None, normed = False) -> H, edges
+
+ Return the N-dimensional histogram computed from sample.
+
+ Parameters
+ ----------
+ sample: A sequence of N arrays, or an KxN array.
+ bins: A sequence of edge arrays, or a sequence of the number of bins.
+ If a scalar is given, it is assumed to be the number of bins
+ for all dimensions.
+ range: A sequence of lower and upper bin edges (default: [min, max]).
+ normed: If False, returns the number of samples in each bin.
+ If True, returns the frequency distribution.
+
+
+ Output
+ ------
+ H: Histogram array.
+ edges: List of arrays defining the bin edges.
+
+ Example:
+ x = random.randn(100,3)
+ H, edges = histogramnd(x, bins = (5, 6, 7))
+
+ See also: histogram
+ """
+
+ try:
+ N, D = sample.shape
+ except (AttributeError, ValueError):
+ ss = atleast_2d(sample)
+ sample = ss.transpose()
+ N, D = sample.shape
+
+ nbin = empty(D, int)
+ edges = D*[None]
+ dedges = D*[None]
+
+ try:
+ M = len(bins)
+ if M != D:
+ raise AttributeError, 'The dimension of bins must be a equal to the dimension of the sample x.'
+ except TypeError:
+ bins = D*[bins]
+
+ if range is None:
+ smin = atleast_1d(sample.min(0))
+ smax = atleast_1d(sample.max(0))
+ else:
+ smin = zeros(D)
+ smax = zeros(D)
+ for i in arange(D):
+ smin[i], smax[i] = range[i]
+
+ for i in arange(D):
+ if isscalar(bins[i]):
+ nbin[i] = bins[i]
+ edges[i] = linspace(smin[i], smax[i], nbin[i]+1)
+ else:
+ edges[i] = asarray(bins[i], float)
+ nbin[i] = len(edges[i])-1
+
+
+
+ Ncount = {}
+ nbin = asarray(nbin)
+
+ for i in arange(D):
+ Ncount[i] = digitize(sample[:,i], edges[i])
+ dedges[i] = diff(edges[i])
+ # Remove values falling outside of bins
+ # Values that fall on an edge are put in the right bin.
+ # For the rightmost bin, we want values equal to the right
+ # edge to be counted in the last bin, and not as an outlier.
+ outliers = zeros(N, int)
+ for i in arange(D):
+ decimal = int(-log10(dedges[i].min())) +6
+ on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
+ Ncount[i][on_edge] -= 1
+ outliers += (Ncount[i] == 0) | (Ncount[i] == nbin[i]+1)
+ indices = where(outliers == 0)[0]
+ for i in arange(D):
+ Ncount[i] = Ncount[i][indices] - 1
+ N = len(indices)
+
+ # Flattened histogram matrix (1D)
+ hist = zeros(nbin.prod(), int)
+
+ # Compute the sample indices in the flattened histogram matrix.
+ ni = nbin.argsort()
+ shape = []
+ xy = zeros(N, int)
+ for i in arange(0, D-1):
+ xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
+
+ xy += Ncount[ni[-1]]
+
+ # Compute the number of repetitions in xy and assign it to the flattened histmat.
+ if len(xy) == 0:
+ return zeros(nbin, int)
+
+ flatcount = bincount(xy)
+ a = arange(len(flatcount))
+ hist[a] = flatcount
+
+ # Shape into a proper matrix
+ hist = hist.reshape(sort(nbin))
+ for i,j in enumerate(ni):
+ hist = hist.swapaxes(i,j)
+ if (hist.shape == nbin).all():
+ break
+
+ if normed:
+ s = hist.sum()
+ for i in arange(D):
+ shape = ones(D, int)
+ shape[i] = nbin[i]
+ hist = hist / dedges[i].reshape(shape)
+ hist /= s
+
+ return hist, edges
+
+
def average(a, axis=None, weights=None, returned=False):
"""average(a, axis=None weights=None, returned=False)