diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2007-05-13 23:22:17 +0000 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2007-05-13 23:22:17 +0000 |
commit | 358475343019d2f08d0db5c79b1a59f2f7311a60 (patch) | |
tree | ac33a84821686165c2f7198c5bb18eea9228c674 /numpy/lib/function_base.py | |
parent | 6ec42819ccd70a406f6be8c6f45ae41fd4851f5f (diff) | |
download | numpy-358475343019d2f08d0db5c79b1a59f2f7311a60.tar.gz |
Add patch from dhuard to histogramdd. Fixes ticket #509.
Restructure restructured comments; avoid consolidated lists, they are too ugly
to contemplate and move around where they aren't wanted. They can be fixed later
if epydoc fixes things up.
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r-- | numpy/lib/function_base.py | 148 |
1 files changed, 91 insertions, 57 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 150523ca4..e038a4803 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -71,29 +71,41 @@ def iterable(y): def histogram(a, bins=10, range=None, normed=False): """Compute the histogram from a set of data. - :Parameters: - - `a` : array - The data to histogram. n-D arrays will be flattened. - - `bins` : int or sequence of floats, optional - If an int, then the number of equal-width bins in the given range. - Otherwise, a sequence of the lower bound of each bin. - - `range` : (float, float), optional - The lower and upper range of the bins. If not provided, then (a.min(), - a.max()) is used. Values outside of this range are allocated to the - closest bin. - - `normed` : bool, optional - If False, the result array will contain the number of samples in each bin. - If True, the result array is the value of the probability *density* - function at the bin normalized such that the *integral* over the range - is 1. Note that the sum of all of the histogram values will not usually - be 1; it is not a probability *mass* function. - - :Returns: - - `hist` : array (n,) - The values of the histogram. See `normed` for a description of the - possible semantics. - - `lower_edges` : float array (n,) - The lower edges of each bin. + Parameters: + + a : array + The data to histogram. n-D arrays will be flattened. + + bins : int or sequence of floats + If an int, then the number of equal-width bins in the given range. + Otherwise, a sequence of the lower bound of each bin. + + range : (float, float) + The lower and upper range of the bins. If not provided, then + (a.min(), a.max()) is used. Values outside of this range are + allocated to the closest bin. + + normed : bool + If False, the result array will contain the number of samples in + each bin. If True, the result array is the value of the + probability *density* function at the bin normalized such that the + *integral* over the range is 1. Note that the sum of all of the + histogram values will not usually be 1; it is not a probability + *mass* function. + + Returns: + + hist : array + The values of the histogram. See `normed` for a description of the + possible semantics. + + lower_edges : float array + The lower edges of each bin. + + SeeAlso: + + histogramdd + """ a = asarray(a).ravel() if not iterable(bins): @@ -120,38 +132,54 @@ def histogram(a, bins=10, range=None, normed=False): return n, bins def histogramdd(sample, bins=10, range=None, normed=False, weights=None): - """histogramdd(sample, bins=10, range=None, normed=False, weights=None) - - Return the D-dimensional histogram of the sample. - - :Parameters: - - `sample` : A sequence of D arrays, or an NxD array. - - `bins` : A sequence of edge arrays, a sequence of bin number, - or a scalar (the number of bins for all dimensions.) - - `range` : A sequence of lower and upper bin edges (default: [min, max]). - - `normed` : Boolean, if False, return the number of samples in each bin, - if True, returns the density. - - `weights` : An array of weights. The weights are normed only if normed is True. - Should weights.sum() not equal N, the total bin count will - not be equal to the number of samples. - - :Return: - - `hist` : Histogram array. - - `edges` : List of arrays defining the bin edges. - + """histogramdd(sample, bins=10, range=None, normed=False, weights=None) - Example: - >>> x = random.randn(100,3) - >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7)) + Return the N-dimensional histogram of the sample. + + Parameters: + + sample : sequence or array + A sequence containing N arrays or an NxM array. Input data. + + bins : sequence or scalar + A sequence of edge arrays, a sequence of bin counts, or a scalar + which is the bin count for all dimensions. Default is 10. + + range : sequence + A sequence of lower and upper bin edges. Default is [min, max]. - :SeeAlso: histogram + normed : boolean + If False, return the number of samples in each bin, if True, + returns the density. + + weights : array + Array of weights. The weights are normed only if normed is True. + Should the sum of the weights not equal N, the total bin count will + not be equal to the number of samples. + + Returns: + + hist : array + Histogram array. + + edges : list + List of arrays defining the lower bin edges. + + SeeAlso: + + histogram + + Example + + >>> x = random.randn(100,3) + >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7)) """ - try: + try: # Sample is an ND-array. N, D = sample.shape - except (AttributeError, ValueError): + except (AttributeError, ValueError): # Sample is a sequence of 1D arrays. sample = atleast_2d(sample).T N, D = sample.shape @@ -161,7 +189,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): dedges = D*[None] if weights is not None: weights = asarray(weights) - + try: M = len(bins) if M != D: @@ -172,14 +200,20 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # Select range for each dimension # Used only if number of bins is given. if range is None: - smin = atleast_1d(sample.min(0)) - smax = atleast_1d(sample.max(0)) + smin = atleast_1d(array(sample.min(0), float)) + smax = atleast_1d(array(sample.max(0), float)) else: smin = zeros(D) smax = zeros(D) for i in arange(D): smin[i], smax[i] = range[i] + # Make sure the bins have a finite width. + for i in arange(len(smin)): + if smin[i] == smax[i]: + smin[i] = smin[i] - .5 + smax[i] = smax[i] + .5 + # Create edge arrays for i in arange(D): if isscalar(bins[i]): @@ -189,14 +223,14 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): edges[i] = asarray(bins[i], float) nbin[i] = len(edges[i])+1 # +1 for outlier bins dedges[i] = diff(edges[i]) - + nbin = asarray(nbin) - - # Compute the bin number each sample falls into. + + # Compute the bin number each sample falls into. Ncount = {} for i in arange(D): Ncount[i] = digitize(sample[:,i], edges[i]) - + # Using digitize, values that fall on an edge are put in the right bin. # For the rightmost bin, we want values equal to the right # edge to be counted in the last bin, and not as an outlier. @@ -206,7 +240,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): decimal = int(-log10(dedges[i].min())) +6 # Find which points are on the rightmost edge. on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0] - # Shift these points one bin to the left. + # Shift these points one bin to the left. Ncount[i][on_edge] -= 1 # Flattened histogram matrix (1D) @@ -238,7 +272,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): # Remove outliers (indices 0 and -1 for each dimension). core = D*[slice(1,-1)] hist = hist[core] - + # Normalize if normed is True if normed: s = hist.sum() |