1 files changed, 54 insertions, 29 deletions
diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py
index 458378593..512b6bf53 100644
--- a/numpy/lib/twodim_base.py
+++ b/numpy/lib/twodim_base.py
@@ -2,8 +2,8 @@
 
 """
 
-__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu','tril',
-           'vander','histogram2d']
+__all__ = ['diag','diagflat','eye','fliplr','flipud','rot90','tri','triu',
+           'tril','vander','histogram2d']
 
 from numpy.core.numeric import asanyarray, int_, equal, subtract, arange, \
      zeros, arange, greater_equal, multiply, ones, asarray
@@ -143,14 +143,21 @@ def vander(x, N=None):
         X[:,i] = x**(N-i-1)
     return X
 
-def  histogram2d(x,y, bins, normed = False):
-    """Compute the 2D histogram for a dataset (x,y) given the edges or
-         the number of bins. 
-
-    Returns histogram, xedges, yedges.
-    The histogram array is a count of the number of samples in each bin. 
-    The array is oriented such that H[i,j] is the number of samples falling
-        into binx[j] and biny[i]. 
+def  histogram2d(x,y, bins=10, range=None, normed=False):
+    """histogram2d(x,y, bins=10, range=None, normed=False) -> H, xedges, yedges
+    
+    Compute the 2D histogram from samples x,y. 
+
+    Parameters
+    ----------
+    x,y: 1D data series. Both arrays must have the same length.
+    bins: Number of bins -or- [nbin x, nbin y] -or- 
+         [bin edges] -or- [x bin edges, y bin edges].
+    range:  A sequence of lower and upper bin edges (default: [min, max]).
+    normed: True or False. 
+    
+    The histogram array is a count of the number of samples in each 
+    two dimensional bin. 
     Setting normed to True returns a density rather than a bin count. 
     Data falling outside of the edges are not counted.
     """
@@ -160,33 +167,40 @@ def  histogram2d(x,y, bins, normed = False):
     except TypeError:
         N = 1
         bins = [bins]
+    x = asarray(x)
+    y = asarray(y)
+    if range is None:
+        xmin, xmax = x.min(), x.max()
+        ymin, ymax = y.min(), y.max()
+    else:
+        xmin, xmax = range[0]
+        ymin, ymax = range[1]
     if N == 2:
         if np.isscalar(bins[0]):
             xnbin = bins[0]
-            xedges = np.linspace(x.min(), x.max(), xnbin+1)
-            
+            xedges = np.linspace(xmin, xmax, xnbin+1)
         else:
             xedges = asarray(bins[0], float)
             xnbin = len(xedges)-1
-        
         if np.isscalar(bins[1]):
             ynbin = bins[1]
-            yedges = np.linspace(y.min(), y.max(), ynbin+1)
+            yedges = np.linspace(ymin, ymax, ynbin+1)
         else:
             yedges = asarray(bins[1], float)
             ynbin = len(yedges)-1
     elif N == 1:
         ynbin = xnbin = bins[0]
-        xedges = np.linspace(x.min(), x.max(), xnbin+1)
-        yedges = np.linspace(y.max(), y.min(), ynbin+1)
-        xedges[-1] *= 1.0001
-        yedges[-1] *= 1.0001         
+        xedges = np.linspace(xmin, xmax, xnbin+1)
+        yedges = np.linspace(ymin, ymax, ynbin+1)
     else:
         yedges = asarray(bins, float)
         xedges = yedges.copy()
         ynbin = len(yedges)-1
         xnbin = len(xedges)-1
     
+    dxedges = np.diff(xedges)
+    dyedges = np.diff(yedges)
+    
     # Flattened histogram matrix (1D)
     hist = np.zeros((xnbin)*(ynbin), int)
 
@@ -194,30 +208,41 @@ def  histogram2d(x,y, bins, normed = False):
     xbin = np.digitize(x,xedges) 
     ybin = np.digitize(y,yedges) 
  
-    # Remove the outliers
+    # Values that fall on an edge are put in the right bin.
+    # For the rightmost bin, we want values equal to the right 
+    # edge to be counted in the last bin, and not as an outlier.
+    xdecimal = int(-np.log10(dxedges.min()))+6
+    ydecimal = int(-np.log10(dyedges.min()))+6
+    on_edge_x = np.where(np.around(x,xdecimal) == np.around(xedges[-1], xdecimal))[0]
+    on_edge_y = np.where(np.around(y,ydecimal) == np.around(yedges[-1], ydecimal))[0]
+    xbin[on_edge_x] -= 1
+    ybin[on_edge_y] -= 1
+    # Remove the true outliers
     outliers = (xbin==0) | (xbin==xnbin+1) | (ybin==0) | (ybin == ynbin+1)
-
-    xbin = xbin[outliers==False]
-    ybin = ybin[outliers==False]
+    xbin = xbin[outliers==False] - 1
+    ybin = ybin[outliers==False] - 1
     
     # Compute the sample indices in the flattened histogram matrix.
     if xnbin >= ynbin:
         xy = ybin*(xnbin) + xbin
-        shift = xnbin + 1
+        
     else:
         xy = xbin*(ynbin) + ybin
-        shift = ynbin + 1
+        
        
     # Compute the number of repetitions in xy and assign it to the flattened
     #  histogram matrix.
+
     flatcount = np.bincount(xy)
-    indices = np.arange(len(flatcount)-shift)
-    hist[indices] = flatcount[shift:]
+    indices = np.arange(len(flatcount))
+    hist[indices] = flatcount
 
+    shape = np.sort([xnbin, ynbin])
     # Shape into a proper matrix
-    histmat = hist.reshape(xnbin, ynbin)
-    
+    histmat = hist.reshape(shape)
+    if (shape == (ynbin, xnbin)).all():
+        histmat = histmat.T
     if normed:
-        diff2 = np.outer(np.diff(yedges), np.diff(xedges))
+        diff2 = np.outer(dxedges, dyedges)
         histmat = histmat / diff2 / histmat.sum()
     return histmat, xedges, yedges