summaryrefslogtreecommitdiff
path: root/numpy/lib/twodim_base.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/twodim_base.py')
-rw-r--r--numpy/lib/twodim_base.py83
1 files changed, 54 insertions, 29 deletions
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 458378593..512b6bf53 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -2,8 +2,8 @@
"""
-__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu','tril',
- 'vander','histogram2d']
+__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu',
+ 'tril','vander','histogram2d']
from numpy.core.numeric import asanyarray, int_, equal, subtract, arange, \
zeros, arange, greater_equal, multiply, ones, asarray
@@ -143,14 +143,21 @@ def vander(x, N=None):
X[:,i] = x**(N-i-1)
return X
-def histogram2d(x,y, bins, normed = False):
- """Compute the 2D histogram for a dataset (x,y) given the edges or
- the number of bins.
-
- Returns histogram, xedges, yedges.
- The histogram array is a count of the number of samples in each bin.
- The array is oriented such that H[i,j] is the number of samples falling
- into binx[j] and biny[i].
+def histogram2d(x,y, bins=10, range=None, normed=False):
+ """histogram2d(x,y, bins=10, range=None, normed=False) -> H, xedges, yedges
+
+ Compute the 2D histogram from samples x,y.
+
+ Parameters
+ ----------
+ x,y: 1D data series. Both arrays must have the same length.
+ bins: Number of bins -or- [nbin x, nbin y] -or-
+ [bin edges] -or- [x bin edges, y bin edges].
+ range: A sequence of lower and upper bin edges (default: [min, max]).
+ normed: True or False.
+
+ The histogram array is a count of the number of samples in each
+ two dimensional bin.
Setting normed to True returns a density rather than a bin count.
Data falling outside of the edges are not counted.
"""
@@ -160,33 +167,40 @@ def histogram2d(x,y, bins, normed = False):
except TypeError:
N = 1
bins = [bins]
+ x = asarray(x)
+ y = asarray(y)
+ if range is None:
+ xmin, xmax = x.min(), x.max()
+ ymin, ymax = y.min(), y.max()
+ else:
+ xmin, xmax = range[0]
+ ymin, ymax = range[1]
if N == 2:
if np.isscalar(bins[0]):
xnbin = bins[0]
- xedges = np.linspace(x.min(), x.max(), xnbin+1)
-
+ xedges = np.linspace(xmin, xmax, xnbin+1)
else:
xedges = asarray(bins[0], float)
xnbin = len(xedges)-1
-
if np.isscalar(bins[1]):
ynbin = bins[1]
- yedges = np.linspace(y.min(), y.max(), ynbin+1)
+ yedges = np.linspace(ymin, ymax, ynbin+1)
else:
yedges = asarray(bins[1], float)
ynbin = len(yedges)-1
elif N == 1:
ynbin = xnbin = bins[0]
- xedges = np.linspace(x.min(), x.max(), xnbin+1)
- yedges = np.linspace(y.max(), y.min(), ynbin+1)
- xedges[-1] *= 1.0001
- yedges[-1] *= 1.0001
+ xedges = np.linspace(xmin, xmax, xnbin+1)
+ yedges = np.linspace(ymin, ymax, ynbin+1)
else:
yedges = asarray(bins, float)
xedges = yedges.copy()
ynbin = len(yedges)-1
xnbin = len(xedges)-1
+ dxedges = np.diff(xedges)
+ dyedges = np.diff(yedges)
+
# Flattened histogram matrix (1D)
hist = np.zeros((xnbin)*(ynbin), int)
@@ -194,30 +208,41 @@ def histogram2d(x,y, bins, normed = False):
xbin = np.digitize(x,xedges)
ybin = np.digitize(y,yedges)
- # Remove the outliers
+ # Values that fall on an edge are put in the right bin.
+ # For the rightmost bin, we want values equal to the right
+ # edge to be counted in the last bin, and not as an outlier.
+ xdecimal = int(-np.log10(dxedges.min()))+6
+ ydecimal = int(-np.log10(dyedges.min()))+6
+ on_edge_x = np.where(np.around(x,xdecimal) == np.around(xedges[-1], xdecimal))[0]
+ on_edge_y = np.where(np.around(y,ydecimal) == np.around(yedges[-1], ydecimal))[0]
+ xbin[on_edge_x] -= 1
+ ybin[on_edge_y] -= 1
+ # Remove the true outliers
outliers = (xbin==0) | (xbin==xnbin+1) | (ybin==0) | (ybin == ynbin+1)
-
- xbin = xbin[outliers==False]
- ybin = ybin[outliers==False]
+ xbin = xbin[outliers==False] - 1
+ ybin = ybin[outliers==False] - 1
# Compute the sample indices in the flattened histogram matrix.
if xnbin >= ynbin:
xy = ybin*(xnbin) + xbin
- shift = xnbin + 1
+
else:
xy = xbin*(ynbin) + ybin
- shift = ynbin + 1
+
# Compute the number of repetitions in xy and assign it to the flattened
# histogram matrix.
+
flatcount = np.bincount(xy)
- indices = np.arange(len(flatcount)-shift)
- hist[indices] = flatcount[shift:]
+ indices = np.arange(len(flatcount))
+ hist[indices] = flatcount
+ shape = np.sort([xnbin, ynbin])
# Shape into a proper matrix
- histmat = hist.reshape(xnbin, ynbin)
-
+ histmat = hist.reshape(shape)
+ if (shape == (ynbin, xnbin)).all():
+ histmat = histmat.T
if normed:
- diff2 = np.outer(np.diff(yedges), np.diff(xedges))
+ diff2 = np.outer(dxedges, dyedges)
histmat = histmat / diff2 / histmat.sum()
return histmat, xedges, yedges