summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorTravis Oliphant <oliphant@enthought.com>2007-04-02 18:25:50 +0000
committerTravis Oliphant <oliphant@enthought.com>2007-04-02 18:25:50 +0000
commita1f65e154f15f1c5b35c2befaeb74f3952b2c3e2 (patch)
tree0dbde984312d3ffbdf7fd940d5090e6848c8c530 /numpy/lib/function_base.py
parent471ee74d8a32267d438f20074c077efb3b057ee7 (diff)
downloadnumpy-a1f65e154f15f1c5b35c2befaeb74f3952b2c3e2.tar.gz
Add patch in Ticket #189 for histogramdd. Fixes bug reported by Ben Granett
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py108
1 files changed, 58 insertions, 50 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 163a129f7..69c9359f1 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1,3 +1,4 @@
+__docformat__ = "restructuredtext en"
__all__ = ['logspace', 'linspace',
'select', 'piecewise', 'trim_zeros',
'copy', 'iterable', #'base_repr', 'binary_repr',
@@ -117,46 +118,49 @@ def histogram(a, bins=10, range=None, normed=False):
else:
return n, bins
-def histogramdd(sample, bins=10, range=None, normed=False):
- """histogramdd(sample, bins = 10, range = None, normed = False) -> H, edges
+def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
+ """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
- Return the D-dimensional histogram computed from sample.
+ Return the D-dimensional histogram of the sample.
- Parameters
- ----------
- sample: A sequence of D arrays, or an NxD array.
- bins: A sequence of edge arrays, or a sequence of the number of bins.
- If a scalar is given, it is assumed to be the number of bins
- for all dimensions.
- range: A sequence of lower and upper bin edges (default: [min, max]).
- normed: If False, returns the number of samples in each bin.
- If True, returns the frequency distribution.
-
-
- Output
- ------
- H: Histogram array.
- edges: List of arrays defining the bin edges.
+ :Parameters:
+ - `sample` : A sequence of D arrays, or an NxD array.
+ - `bins` : A sequence of edge arrays, a sequence of bin number,
+ or a scalar (the number of bins for all dimensions.)
+ - `range` : A sequence of lower and upper bin edges (default: [min, max]).
+ - `normed` : Boolean, if False, return the number of samples in each bin,
+ if True, returns the density.
+ - `weights` : An array of weights. The weights are normed only if normed is True.
+ Should weights.sum() not equal N, the total bin count will
+ not be equal to the number of samples.
+
+ :Return:
+ - `hist` : Histogram array.
+ - `edges` : List of arrays defining the bin edges.
+
Example:
- x = random.randn(100,3)
- H, edges = histogramdd(x, bins = (5, 6, 7))
+ >>> x = random.randn(100,3)
+ >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
- See also: histogram
+ :SeeAlso: histogram
"""
- try:
+ try:
+ # Sample is an ND-array.
N, D = sample.shape
- except (AttributeError, ValueError):
- ss = atleast_2d(sample)
- sample = ss.transpose()
+ except (AttributeError, ValueError):
+ # Sample is a sequence of 1D arrays.
+ sample = atleast_2d(sample).T
N, D = sample.shape
nbin = empty(D, int)
edges = D*[None]
dedges = D*[None]
-
+ if weights is not None:
+ weights = asarray(weights)
+
try:
M = len(bins)
if M != D:
@@ -164,6 +168,8 @@ def histogramdd(sample, bins=10, range=None, normed=False):
except TypeError:
bins = D*[bins]
+ # Select range for each dimension
+ # Used only if number of bins is given.
if range is None:
smin = atleast_1d(sample.min(0))
smax = atleast_1d(sample.max(0))
@@ -173,39 +179,37 @@ def histogramdd(sample, bins=10, range=None, normed=False):
for i in arange(D):
smin[i], smax[i] = range[i]
+ # Create edge arrays
for i in arange(D):
if isscalar(bins[i]):
- nbin[i] = bins[i]
- edges[i] = linspace(smin[i], smax[i], nbin[i]+1)
+ nbin[i] = bins[i] + 2 # +2 for outlier bins
+ edges[i] = linspace(smin[i], smax[i], nbin[i]-1)
else:
edges[i] = asarray(bins[i], float)
- nbin[i] = len(edges[i])-1
-
-
-
- Ncount = {}
+ nbin[i] = len(edges[i])+1 # +1 for outlier bins
+ dedges[i] = diff(edges[i])
+
nbin = asarray(nbin)
-
+
+ # Compute the bin number each sample falls into.
+ Ncount = {}
for i in arange(D):
Ncount[i] = digitize(sample[:,i], edges[i])
- dedges[i] = diff(edges[i])
- # Remove values falling outside of bins
- # Values that fall on an edge are put in the right bin.
+
+ # Using digitize, values that fall on an edge are put in the right bin.
# For the rightmost bin, we want values equal to the right
# edge to be counted in the last bin, and not as an outlier.
outliers = zeros(N, int)
for i in arange(D):
+ # Rounding precision
decimal = int(-log10(dedges[i].min())) +6
+ # Find which points are on the rightmost edge.
on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
+ # Shift these points one bin to the left.
Ncount[i][on_edge] -= 1
- outliers += (Ncount[i] == 0) | (Ncount[i] == nbin[i]+1)
- indices = where(outliers == 0)[0]
- for i in arange(D):
- Ncount[i] = Ncount[i][indices] - 1
- N = len(indices)
# Flattened histogram matrix (1D)
- hist = zeros(nbin.prod(), int)
+ hist = zeros(nbin.prod(), float)
# Compute the sample indices in the flattened histogram matrix.
ni = nbin.argsort()
@@ -213,29 +217,33 @@ def histogramdd(sample, bins=10, range=None, normed=False):
xy = zeros(N, int)
for i in arange(0, D-1):
xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod()
-
xy += Ncount[ni[-1]]
# Compute the number of repetitions in xy and assign it to the flattened histmat.
if len(xy) == 0:
- return zeros(nbin, int)
+ return zeros(nbin-2, int), edges
- flatcount = bincount(xy)
+ flatcount = bincount(xy, weights)
a = arange(len(flatcount))
hist[a] = flatcount
# Shape into a proper matrix
hist = hist.reshape(sort(nbin))
- for i,j in enumerate(ni):
+ for i in arange(nbin.size):
+ j = ni[i]
hist = hist.swapaxes(i,j)
- if (hist.shape == nbin).all():
- break
+ ni[i],ni[j] = ni[j],ni[i]
+ # Remove outliers (indices 0 and -1 for each dimension).
+ core = D*[slice(1,-1)]
+ hist = hist[core]
+
+ # Normalize if normed is True
if normed:
s = hist.sum()
for i in arange(D):
shape = ones(D, int)
- shape[i] = nbin[i]
+ shape[i] = nbin[i]-2
hist = hist / dedges[i].reshape(shape)
hist /= s