diff options
-rw-r--r-- | doc/release/1.6.0-notes.rst | 7 | ||||
-rw-r--r-- | numpy/lib/function_base.py | 47 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 27 |
3 files changed, 54 insertions, 27 deletions
diff --git a/doc/release/1.6.0-notes.rst b/doc/release/1.6.0-notes.rst index ef28d2a3a..065577e26 100644 --- a/doc/release/1.6.0-notes.rst +++ b/doc/release/1.6.0-notes.rst @@ -132,6 +132,13 @@ conversion of arbitrary python objects into arrays is exposed by ``PyArray_GetArrayParamsFromObject``. +Deprecated features +=================== + +The "normed" keyword in ``numpy.histogram`` is deprecated. Its functionality +will be replaced by the new "density" keyword. + + Removed features ================ diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 96c3f2a35..7c5f0c5af 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -58,7 +58,7 @@ def iterable(y): except: return 0 return 1 -def histogram(a, bins=10, range=None, normed=False, weights=None): +def histogram(a, bins=10, range=None, normed=False, weights=None, density=None): """ Compute the histogram of a set of data. @@ -76,17 +76,27 @@ def histogram(a, bins=10, range=None, normed=False, weights=None): is simply ``(a.min(), a.max())``. Values outside the range are ignored. normed : bool, optional + This keyword is deprecated in Numpy 1.6 due to confusing/buggy + behavior. It will be removed in Numpy 2.0. Use the density keyword + instead. If False, the result will contain the number of samples in each bin. If True, the result is the value of the probability *density* function at the bin, normalized such that - the *integral* over the range is 1. Note that the sum of the - histogram values will not be equal to 1 unless bins of unity - width are chosen; it is not a probability *mass* function. + the *integral* over the range is 1. Note that this latter behavior is + known to be buggy with unequal bin widths; use `density` instead. weights : array_like, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count (instead of 1). If `normed` is True, the weights are normalized, so that the integral of the density over the range remains 1 + density : bool, optional + If False, the result will contain the number of samples + in each bin. If True, the result is the value of the + probability *density* function at the bin, normalized such that + the *integral* over the range is 1. Note that the sum of the + histogram values will not be equal to 1 unless bins of unity + width are chosen; it is not a probability *mass* function. + Overrides the `normed` keyword if given. Returns ------- @@ -116,13 +126,13 @@ def histogram(a, bins=10, range=None, normed=False, weights=None): -------- >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) (array([0, 2, 1]), array([0, 1, 2, 3])) - >>> np.histogram(np.arange(4), bins=np.arange(5), normed=True) + >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) (array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) (array([1, 4, 1]), array([0, 1, 2, 3])) >>> a = np.arange(5) - >>> hist, bin_edges = np.histogram(a, normed=True) + >>> hist, bin_edges = np.histogram(a, density=True) >>> hist array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) >>> hist.sum() @@ -155,10 +165,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None): mn -= 0.5 mx += 0.5 bins = linspace(mn, mx, bins+1, endpoint=True) - uniform = True else: bins = asarray(bins) - uniform = False if (np.diff(bins) < 0).any(): raise AttributeError( 'bins must increase monotonically.') @@ -191,18 +199,19 @@ def histogram(a, bins=10, range=None, normed=False, weights=None): n = np.diff(n) - if normed: - db = array(np.diff(bins), float) - if not uniform: - warnings.warn(""" - This release of NumPy (1.6) fixes a normalization bug in histogram - function occuring with non-uniform bin widths. The returned value - is now a density: n / (N * bin width), where n is the bin count and - N the total number of points. - """) - return n/db/n.sum(), bins + if density is not None: + if density: + db = array(np.diff(bins), float) + return n/db/n.sum(), bins + else: + return n, bins else: - return n, bins + # deprecated, buggy behavior. Remove for Numpy 2.0 + if normed: + db = array(np.diff(bins), float) + return n/(n*db).sum(), bins + else: + return n, bins def histogramdd(sample, bins=10, range=None, normed=False, weights=None): diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 6e80b0438..ea3ca4000 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -565,10 +565,25 @@ class TestHistogram(TestCase): area = sum(a * diff(b)) assert_almost_equal(area, 1) + # Check with non-constant bin widths (buggy but backwards compatible) + v = np.arange(10) + bins = [0, 1, 5, 9, 10] + a, b = histogram(v, bins, normed=True) + area = sum(a * diff(b)) + assert_almost_equal(area, 1) + + def test_density(self): + # Check that the integral of the density equals 1. + n = 100 + v = rand(n) + a, b = histogram(v, density=True) + area = sum(a * diff(b)) + assert_almost_equal(area, 1) + # Check with non-constant bin widths v = np.arange(10) bins = [0,1,3,6,10] - a, b = histogram(v, bins, normed=True) + a, b = histogram(v, bins, density=True) assert_array_equal(a, .1) assert_equal(sum(a*diff(b)), 1) @@ -576,14 +591,13 @@ class TestHistogram(TestCase): # infinities. v = np.arange(10) bins = [0,1,3,6,np.inf] - a, b = histogram(v, bins, normed=True) + a, b = histogram(v, bins, density=True) assert_array_equal(a, [.1,.1,.1,0.]) # Taken from a bug report from N. Becker on the numpy-discussion # mailing list Aug. 6, 2010. - counts, dmy = np.histogram([1,2,3,4], [0.5,1.5,np.inf], normed=True) + counts, dmy = np.histogram([1,2,3,4], [0.5,1.5,np.inf], density=True) assert_equal(counts, [.25, 0]) - warnings.filters.pop(0) def test_outliers(self): # Check that outliers are not tallied @@ -646,13 +660,10 @@ class TestHistogram(TestCase): wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True) assert_array_almost_equal(wa, array([4, 5, 0, 1]) / 10. / 3. * 4) - warnings.filterwarnings('ignore', \ - message="\s*This release of NumPy fixes a normalization bug") # Check weights with non-uniform bin widths a,b = histogram(np.arange(9), [0,1,3,6,10], \ - weights=[2,1,1,1,1,1,1,1,1], normed=True) + weights=[2,1,1,1,1,1,1,1,1], density=True) assert_almost_equal(a, [.2, .1, .1, .075]) - warnings.filters.pop(0) def test_empty(self): a, b = histogram([], bins=([0,1])) |