summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.6.0-notes.rst7
-rw-r--r--numpy/lib/function_base.py47
-rw-r--r--numpy/lib/tests/test_function_base.py27
3 files changed, 54 insertions, 27 deletions
diff --git a/doc/release/1.6.0-notes.rst b/doc/release/1.6.0-notes.rst
index ef28d2a3a..065577e26 100644
--- a/doc/release/1.6.0-notes.rst
+++ b/doc/release/1.6.0-notes.rst
@@ -132,6 +132,13 @@ conversion of arbitrary python objects into arrays is exposed by
``PyArray_GetArrayParamsFromObject``.
+Deprecated features
+===================
+
+The "normed" keyword in ``numpy.histogram`` is deprecated. Its functionality
+will be replaced by the new "density" keyword.
+
+
Removed features
================
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 96c3f2a35..7c5f0c5af 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -58,7 +58,7 @@ def iterable(y):
except: return 0
return 1
-def histogram(a, bins=10, range=None, normed=False, weights=None):
+def histogram(a, bins=10, range=None, normed=False, weights=None, density=None):
"""
Compute the histogram of a set of data.
@@ -76,17 +76,27 @@ def histogram(a, bins=10, range=None, normed=False, weights=None):
is simply ``(a.min(), a.max())``. Values outside the range are
ignored.
normed : bool, optional
+ This keyword is deprecated in Numpy 1.6 due to confusing/buggy
+ behavior. It will be removed in Numpy 2.0. Use the density keyword
+ instead.
If False, the result will contain the number of samples
in each bin. If True, the result is the value of the
probability *density* function at the bin, normalized such that
- the *integral* over the range is 1. Note that the sum of the
- histogram values will not be equal to 1 unless bins of unity
- width are chosen; it is not a probability *mass* function.
+ the *integral* over the range is 1. Note that this latter behavior is
+ known to be buggy with unequal bin widths; use `density` instead.
weights : array_like, optional
An array of weights, of the same shape as `a`. Each value in `a`
only contributes its associated weight towards the bin count
(instead of 1). If `normed` is True, the weights are normalized,
so that the integral of the density over the range remains 1
+ density : bool, optional
+ If False, the result will contain the number of samples
+ in each bin. If True, the result is the value of the
+ probability *density* function at the bin, normalized such that
+ the *integral* over the range is 1. Note that the sum of the
+ histogram values will not be equal to 1 unless bins of unity
+ width are chosen; it is not a probability *mass* function.
+ Overrides the `normed` keyword if given.
Returns
-------
@@ -116,13 +126,13 @@ def histogram(a, bins=10, range=None, normed=False, weights=None):
--------
>>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3])
(array([0, 2, 1]), array([0, 1, 2, 3]))
- >>> np.histogram(np.arange(4), bins=np.arange(5), normed=True)
+ >>> np.histogram(np.arange(4), bins=np.arange(5), density=True)
(array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4]))
>>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3])
(array([1, 4, 1]), array([0, 1, 2, 3]))
>>> a = np.arange(5)
- >>> hist, bin_edges = np.histogram(a, normed=True)
+ >>> hist, bin_edges = np.histogram(a, density=True)
>>> hist
array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5])
>>> hist.sum()
@@ -155,10 +165,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None):
mn -= 0.5
mx += 0.5
bins = linspace(mn, mx, bins+1, endpoint=True)
- uniform = True
else:
bins = asarray(bins)
- uniform = False
if (np.diff(bins) < 0).any():
raise AttributeError(
'bins must increase monotonically.')
@@ -191,18 +199,19 @@ def histogram(a, bins=10, range=None, normed=False, weights=None):
n = np.diff(n)
- if normed:
- db = array(np.diff(bins), float)
- if not uniform:
- warnings.warn("""
- This release of NumPy (1.6) fixes a normalization bug in histogram
- function occuring with non-uniform bin widths. The returned value
- is now a density: n / (N * bin width), where n is the bin count and
- N the total number of points.
- """)
- return n/db/n.sum(), bins
+ if density is not None:
+ if density:
+ db = array(np.diff(bins), float)
+ return n/db/n.sum(), bins
+ else:
+ return n, bins
else:
- return n, bins
+ # deprecated, buggy behavior. Remove for Numpy 2.0
+ if normed:
+ db = array(np.diff(bins), float)
+ return n/(n*db).sum(), bins
+ else:
+ return n, bins
def histogramdd(sample, bins=10, range=None, normed=False, weights=None):
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 6e80b0438..ea3ca4000 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -565,10 +565,25 @@ class TestHistogram(TestCase):
area = sum(a * diff(b))
assert_almost_equal(area, 1)
+ # Check with non-constant bin widths (buggy but backwards compatible)
+ v = np.arange(10)
+ bins = [0, 1, 5, 9, 10]
+ a, b = histogram(v, bins, normed=True)
+ area = sum(a * diff(b))
+ assert_almost_equal(area, 1)
+
+ def test_density(self):
+ # Check that the integral of the density equals 1.
+ n = 100
+ v = rand(n)
+ a, b = histogram(v, density=True)
+ area = sum(a * diff(b))
+ assert_almost_equal(area, 1)
+
# Check with non-constant bin widths
v = np.arange(10)
bins = [0,1,3,6,10]
- a, b = histogram(v, bins, normed=True)
+ a, b = histogram(v, bins, density=True)
assert_array_equal(a, .1)
assert_equal(sum(a*diff(b)), 1)
@@ -576,14 +591,13 @@ class TestHistogram(TestCase):
# infinities.
v = np.arange(10)
bins = [0,1,3,6,np.inf]
- a, b = histogram(v, bins, normed=True)
+ a, b = histogram(v, bins, density=True)
assert_array_equal(a, [.1,.1,.1,0.])
# Taken from a bug report from N. Becker on the numpy-discussion
# mailing list Aug. 6, 2010.
- counts, dmy = np.histogram([1,2,3,4], [0.5,1.5,np.inf], normed=True)
+ counts, dmy = np.histogram([1,2,3,4], [0.5,1.5,np.inf], density=True)
assert_equal(counts, [.25, 0])
- warnings.filters.pop(0)
def test_outliers(self):
# Check that outliers are not tallied
@@ -646,13 +660,10 @@ class TestHistogram(TestCase):
wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True)
assert_array_almost_equal(wa, array([4, 5, 0, 1]) / 10. / 3. * 4)
- warnings.filterwarnings('ignore', \
- message="\s*This release of NumPy fixes a normalization bug")
# Check weights with non-uniform bin widths
a,b = histogram(np.arange(9), [0,1,3,6,10], \
- weights=[2,1,1,1,1,1,1,1,1], normed=True)
+ weights=[2,1,1,1,1,1,1,1,1], density=True)
assert_almost_equal(a, [.2, .1, .1, .075])
- warnings.filters.pop(0)
def test_empty(self):
a, b = histogram([], bins=([0,1]))