diff options
Diffstat (limited to 'numpy/lib/twodim_base.py')
-rw-r--r-- | numpy/lib/twodim_base.py | 83 |
1 files changed, 54 insertions, 29 deletions
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 458378593..512b6bf53 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -2,8 +2,8 @@ """ -__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu','tril', - 'vander','histogram2d'] +__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu', + 'tril','vander','histogram2d'] from numpy.core.numeric import asanyarray, int_, equal, subtract, arange, \ zeros, arange, greater_equal, multiply, ones, asarray @@ -143,14 +143,21 @@ def vander(x, N=None): X[:,i] = x**(N-i-1) return X -def histogram2d(x,y, bins, normed = False): - """Compute the 2D histogram for a dataset (x,y) given the edges or - the number of bins. - - Returns histogram, xedges, yedges. - The histogram array is a count of the number of samples in each bin. - The array is oriented such that H[i,j] is the number of samples falling - into binx[j] and biny[i]. +def histogram2d(x,y, bins=10, range=None, normed=False): + """histogram2d(x,y, bins=10, range=None, normed=False) -> H, xedges, yedges + + Compute the 2D histogram from samples x,y. + + Parameters + ---------- + x,y: 1D data series. Both arrays must have the same length. + bins: Number of bins -or- [nbin x, nbin y] -or- + [bin edges] -or- [x bin edges, y bin edges]. + range: A sequence of lower and upper bin edges (default: [min, max]). + normed: True or False. + + The histogram array is a count of the number of samples in each + two dimensional bin. Setting normed to True returns a density rather than a bin count. Data falling outside of the edges are not counted. """ @@ -160,33 +167,40 @@ def histogram2d(x,y, bins, normed = False): except TypeError: N = 1 bins = [bins] + x = asarray(x) + y = asarray(y) + if range is None: + xmin, xmax = x.min(), x.max() + ymin, ymax = y.min(), y.max() + else: + xmin, xmax = range[0] + ymin, ymax = range[1] if N == 2: if np.isscalar(bins[0]): xnbin = bins[0] - xedges = np.linspace(x.min(), x.max(), xnbin+1) - + xedges = np.linspace(xmin, xmax, xnbin+1) else: xedges = asarray(bins[0], float) xnbin = len(xedges)-1 - if np.isscalar(bins[1]): ynbin = bins[1] - yedges = np.linspace(y.min(), y.max(), ynbin+1) + yedges = np.linspace(ymin, ymax, ynbin+1) else: yedges = asarray(bins[1], float) ynbin = len(yedges)-1 elif N == 1: ynbin = xnbin = bins[0] - xedges = np.linspace(x.min(), x.max(), xnbin+1) - yedges = np.linspace(y.max(), y.min(), ynbin+1) - xedges[-1] *= 1.0001 - yedges[-1] *= 1.0001 + xedges = np.linspace(xmin, xmax, xnbin+1) + yedges = np.linspace(ymin, ymax, ynbin+1) else: yedges = asarray(bins, float) xedges = yedges.copy() ynbin = len(yedges)-1 xnbin = len(xedges)-1 + dxedges = np.diff(xedges) + dyedges = np.diff(yedges) + # Flattened histogram matrix (1D) hist = np.zeros((xnbin)*(ynbin), int) @@ -194,30 +208,41 @@ def histogram2d(x,y, bins, normed = False): xbin = np.digitize(x,xedges) ybin = np.digitize(y,yedges) - # Remove the outliers + # Values that fall on an edge are put in the right bin. + # For the rightmost bin, we want values equal to the right + # edge to be counted in the last bin, and not as an outlier. + xdecimal = int(-np.log10(dxedges.min()))+6 + ydecimal = int(-np.log10(dyedges.min()))+6 + on_edge_x = np.where(np.around(x,xdecimal) == np.around(xedges[-1], xdecimal))[0] + on_edge_y = np.where(np.around(y,ydecimal) == np.around(yedges[-1], ydecimal))[0] + xbin[on_edge_x] -= 1 + ybin[on_edge_y] -= 1 + # Remove the true outliers outliers = (xbin==0) | (xbin==xnbin+1) | (ybin==0) | (ybin == ynbin+1) - - xbin = xbin[outliers==False] - ybin = ybin[outliers==False] + xbin = xbin[outliers==False] - 1 + ybin = ybin[outliers==False] - 1 # Compute the sample indices in the flattened histogram matrix. if xnbin >= ynbin: xy = ybin*(xnbin) + xbin - shift = xnbin + 1 + else: xy = xbin*(ynbin) + ybin - shift = ynbin + 1 + # Compute the number of repetitions in xy and assign it to the flattened # histogram matrix. + flatcount = np.bincount(xy) - indices = np.arange(len(flatcount)-shift) - hist[indices] = flatcount[shift:] + indices = np.arange(len(flatcount)) + hist[indices] = flatcount + shape = np.sort([xnbin, ynbin]) # Shape into a proper matrix - histmat = hist.reshape(xnbin, ynbin) - + histmat = hist.reshape(shape) + if (shape == (ynbin, xnbin)).all(): + histmat = histmat.T if normed: - diff2 = np.outer(np.diff(yedges), np.diff(xedges)) + diff2 = np.outer(dxedges, dyedges) histmat = histmat / diff2 / histmat.sum() return histmat, xedges, yedges |