summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2007-05-13 23:22:17 +0000
committerCharles Harris <charlesr.harris@gmail.com>2007-05-13 23:22:17 +0000
commit358475343019d2f08d0db5c79b1a59f2f7311a60 (patch)
treeac33a84821686165c2f7198c5bb18eea9228c674 /numpy/lib/function_base.py
parent6ec42819ccd70a406f6be8c6f45ae41fd4851f5f (diff)
downloadnumpy-358475343019d2f08d0db5c79b1a59f2f7311a60.tar.gz
Add patch from dhuard to histogramdd. Fixes ticket #509.
Restructure restructured comments; avoid consolidated lists, they are too ugly to contemplate and move around where they aren't wanted. They can be fixed later if epydoc fixes things up.
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py148
1 files changed, 91 insertions, 57 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 150523ca4..e038a4803 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -71,29 +71,41 @@ def iterable(y):
def histogram(a, bins=10, range=None, normed=False):
"""Compute the histogram from a set of data.
- :Parameters:
- - `a` : array
- The data to histogram. n-D arrays will be flattened.
- - `bins` : int or sequence of floats, optional
- If an int, then the number of equal-width bins in the given range.
- Otherwise, a sequence of the lower bound of each bin.
- - `range` : (float, float), optional
- The lower and upper range of the bins. If not provided, then (a.min(),
- a.max()) is used. Values outside of this range are allocated to the
- closest bin.
- - `normed` : bool, optional
- If False, the result array will contain the number of samples in each bin.
- If True, the result array is the value of the probability *density*
- function at the bin normalized such that the *integral* over the range
- is 1. Note that the sum of all of the histogram values will not usually
- be 1; it is not a probability *mass* function.
-
- :Returns:
- - `hist` : array (n,)
- The values of the histogram. See `normed` for a description of the
- possible semantics.
- - `lower_edges` : float array (n,)
- The lower edges of each bin.
+ Parameters:
+
+ a : array
+ The data to histogram. n-D arrays will be flattened.
+
+ bins : int or sequence of floats
+ If an int, then the number of equal-width bins in the given range.
+ Otherwise, a sequence of the lower bound of each bin.
+
+ range : (float, float)
+ The lower and upper range of the bins. If not provided, then
+ (a.min(), a.max()) is used. Values outside of this range are
+ allocated to the closest bin.
+
+ normed : bool
+ If False, the result array will contain the number of samples in
+ each bin. If True, the result array is the value of the
+ probability *density* function at the bin normalized such that the
+ *integral* over the range is 1. Note that the sum of all of the
+ histogram values will not usually be 1; it is not a probability
+ *mass* function.
+
+ Returns:
+
+ hist : array
+ The values of the histogram. See `normed` for a description of the
+ possible semantics.
+
+ lower_edges : float array
+ The lower edges of each bin.
+
+ SeeAlso:
+
+ histogramdd
+
"""
a = asarray(a).ravel()
if not iterable(bins):
@@ -120,38 +132,54 @@ def histogram(a, bins=10, range=None, normed=False):
return n, bins
def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
- """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
-
- Return the D-dimensional histogram of the sample.
-
- :Parameters:
- - `sample` : A sequence of D arrays, or an NxD array.
- - `bins` : A sequence of edge arrays, a sequence of bin number,
- or a scalar (the number of bins for all dimensions.)
- - `range` : A sequence of lower and upper bin edges (default: [min, max]).
- - `normed` : Boolean, if False, return the number of samples in each bin,
- if True, returns the density.
- - `weights` : An array of weights. The weights are normed only if normed is True.
- Should weights.sum() not equal N, the total bin count will
- not be equal to the number of samples.
-
- :Return:
- - `hist` : Histogram array.
- - `edges` : List of arrays defining the bin edges.
-
+ """histogramdd(sample, bins=10, range=None, normed=False, weights=None)
- Example:
- >>> x = random.randn(100,3)
- >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
+ Return the N-dimensional histogram of the sample.
+
+ Parameters:
+
+ sample : sequence or array
+ A sequence containing N arrays or an NxM array. Input data.
+
+ bins : sequence or scalar
+ A sequence of edge arrays, a sequence of bin counts, or a scalar
+ which is the bin count for all dimensions. Default is 10.
+
+ range : sequence
+ A sequence of lower and upper bin edges. Default is [min, max].
- :SeeAlso: histogram
+ normed : boolean
+ If False, return the number of samples in each bin, if True,
+ returns the density.
+
+ weights : array
+ Array of weights. The weights are normed only if normed is True.
+ Should the sum of the weights not equal N, the total bin count will
+ not be equal to the number of samples.
+
+ Returns:
+
+ hist : array
+ Histogram array.
+
+ edges : list
+ List of arrays defining the lower bin edges.
+
+ SeeAlso:
+
+ histogram
+
+ Example
+
+ >>> x = random.randn(100,3)
+ >>> hist3d, edges = histogramdd(x, bins = (5, 6, 7))
"""
- try:
+ try:
# Sample is an ND-array.
N, D = sample.shape
- except (AttributeError, ValueError):
+ except (AttributeError, ValueError):
# Sample is a sequence of 1D arrays.
sample = atleast_2d(sample).T
N, D = sample.shape
@@ -161,7 +189,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
dedges = D*[None]
if weights is not None:
weights = asarray(weights)
-
+
try:
M = len(bins)
if M != D:
@@ -172,14 +200,20 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
# Select range for each dimension
# Used only if number of bins is given.
if range is None:
- smin = atleast_1d(sample.min(0))
- smax = atleast_1d(sample.max(0))
+ smin = atleast_1d(array(sample.min(0), float))
+ smax = atleast_1d(array(sample.max(0), float))
else:
smin = zeros(D)
smax = zeros(D)
for i in arange(D):
smin[i], smax[i] = range[i]
+ # Make sure the bins have a finite width.
+ for i in arange(len(smin)):
+ if smin[i] == smax[i]:
+ smin[i] = smin[i] - .5
+ smax[i] = smax[i] + .5
+
# Create edge arrays
for i in arange(D):
if isscalar(bins[i]):
@@ -189,14 +223,14 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
edges[i] = asarray(bins[i], float)
nbin[i] = len(edges[i])+1 # +1 for outlier bins
dedges[i] = diff(edges[i])
-
+
nbin = asarray(nbin)
-
- # Compute the bin number each sample falls into.
+
+ # Compute the bin number each sample falls into.
Ncount = {}
for i in arange(D):
Ncount[i] = digitize(sample[:,i], edges[i])
-
+
# Using digitize, values that fall on an edge are put in the right bin.
# For the rightmost bin, we want values equal to the right
# edge to be counted in the last bin, and not as an outlier.
@@ -206,7 +240,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
decimal = int(-log10(dedges[i].min())) +6
# Find which points are on the rightmost edge.
on_edge = where(around(sample[:,i], decimal) == around(edges[i][-1], decimal))[0]
- # Shift these points one bin to the left.
+ # Shift these points one bin to the left.
Ncount[i][on_edge] -= 1
# Flattened histogram matrix (1D)
@@ -238,7 +272,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
# Remove outliers (indices 0 and -1 for each dimension).
core = D*[slice(1,-1)]
hist = hist[core]
-
+
# Normalize if normed is True
if normed:
s = hist.sum()