diff options
author | dhuard <dhuard@localhost> | 2008-08-05 15:43:45 +0000 |
---|---|---|
committer | dhuard <dhuard@localhost> | 2008-08-05 15:43:45 +0000 |
commit | df9e25e2845f8a280b8808a33b1429ac6a4841f5 (patch) | |
tree | 6b9c7e41e0a052d592ede888f71ff6450823a1b9 /numpy/lib | |
parent | 6647bf7eaeb915e2d09db8b5c7584ee286962d3b (diff) | |
download | numpy-df9e25e2845f8a280b8808a33b1429ac6a4841f5.tar.gz |
Follow-up on changes to histogram semantics.
`new` is now set to None by default, which triggers the new behaviour and prints a warning.
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/function_base.py | 78 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 66 |
2 files changed, 85 insertions, 59 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 3a0212a0e..3090f60dd 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -191,7 +191,7 @@ def iterable(y): except: return 0 return 1 -def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): +def histogram(a, bins=10, range=None, normed=False, weights=None, new=None): """ Compute the histogram of a set of data. @@ -200,14 +200,15 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): a : array_like Input data. bins : int or sequence of scalars, optional - If `bins` is an int, it gives the number of equal-width bins in the - given range (10, by default). If `new` is True, bins can also be - the bin edges, allowing for non-uniform bin widths. + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a sequence, + it defines the bin edges, including the rightmost edge, allowing + for non-uniform bin widths. range : (float, float), optional The lower and upper range of the bins. If not provided, range - is simply ``(a.min(), a.max())``. With `new` set to True, values - outside the range are ignored. With `new` set to False, values - below the range are ignored, and those above the range are tallied + is simply ``(a.min(), a.max())``. Values outside the range are + ignored. Note that with `new` set to False, values below + the range are ignored, while those above the range are tallied in the rightmost bin. normed : bool, optional If False, the result will contain the number of samples @@ -222,20 +223,22 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): (instead of 1). If `normed` is True, the weights are normalized, so that the integral of the density over the range remains 1. The `weights` keyword is only available with `new` set to True. - new : bool, optional - Compatibility argument to aid in the transition between the old - (v1.1) and the new (v1.2) implementations. In version 1.2, - `new` will be True by default. + new : {None, True, False}, optional + Whether to use the new semantics for histogram: + * None : the new behaviour is used, and a warning is printed, + * True : the new behaviour is used and no warning is printed, + * False : the old behaviour is used and a message is printed + warning about future deprecation. Returns ------- hist : array The values of the histogram. See `normed` and `weights` for a description of the possible semantics. - bin_edges : array of dtype float - With ``new = False``, return the left bin edges (``length(hist)``). - With ``new = True``, return the bin edges ``(length(hist)+1)``. + Return the bin edges ``(length(hist)+1)``. + With ``new=False``, return the left bin edges (``length(hist)``). + See Also -------- @@ -259,13 +262,20 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): """ # Old behavior - if new is False: + if new == False: warnings.warn(""" - The semantics of histogram will be modified in - release 1.2 to improve outlier handling. The new behavior can be - obtained using new=True. Note that the new version accepts/returns - the bin edges instead of the left bin edges. - Please read the docstring for more information.""", FutureWarning) + The original semantics of histogram is scheduled to be + deprecated in NumPy 1.3. The new semantics fixes + long-standing issues with outliers handling. The main + changes concern + 1. the definition of the bin edges, + now including the rightmost edge, and + 2. the handling of upper outliers, + now ignored rather than tallied in the rightmost bin. + + Please read the docstring for more information. + """, Warning) + a = asarray(a).ravel() if (range is not None): @@ -277,10 +287,6 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): if not iterable(bins): if range is None: range = (a.min(), a.max()) - else: - warnings.warn(""" - Outliers handling will change in version 1.2. - Please read the docstring for details.""", FutureWarning) mn, mx = [mi+0.0 for mi in range] if mn == mx: mn -= 0.5 @@ -289,10 +295,7 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): else: if normed: raise ValueError, 'Use new=True to pass bin edges explicitly.' - warnings.warn(""" - The semantic for bins will change in version 1.2. - The bins will become the bin edges, instead of the left bin edges. - """, FutureWarning) + raise ValueError, 'Use new=True to pass bin edges explicitly.' bins = asarray(bins) if (np.diff(bins) < 0).any(): raise AttributeError, 'bins must increase monotonically.' @@ -318,7 +321,24 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, new=False): # New behavior - elif new is True: + elif new in [True, None]: + if new is None: + warnings.warn(""" + The semantics of histogram has been modified in + the current release to fix long-standing issues with + outliers handling. The main changes concern + 1. the definition of the bin edges, + now including the rightmost edge, and + 2. the handling of upper outliers, now ignored rather + than tallied in the rightmost bin. + The previous behaviour is still accessible using + `new=False`, but is scheduled to be deprecated in the + next release (1.3). + + *This warning will not printed in the 1.3 release.* + + Please read the docstring for more information. + """, Warning) a = asarray(a) if weights is not None: weights = asarray(weights) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 3cc607f93..6e1c3583d 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -440,110 +440,116 @@ class TestSinc(TestCase): class TestHistogram(TestCase): def setUp(self): - warnings.simplefilter('ignore', FutureWarning) - + warnings.simplefilter('ignore', Warning) + def tearDown(self): warnings.resetwarnings() - def test_simple(self): + def test_simple_old(self): n=100 v=rand(n) - (a,b)=histogram(v) + (a,b)=histogram(v, new=False) #check if the sum of the bins equals the number of samples assert_equal(sum(a,axis=0), n) #check that the bin counts are evenly spaced when the data is from a # linear function - (a,b)=histogram(linspace(0,10,100)) + (a,b)=histogram(linspace(0,10,100), new=False) assert_array_equal(a, 10) - def test_simple_new(self): + def test_simple(self): n=100 v=rand(n) - (a,b)=histogram(v, new=True) + (a,b)=histogram(v) #check if the sum of the bins equals the number of samples assert_equal(sum(a,axis=0), n) #check that the bin counts are evenly spaced when the data is from a # linear function - (a,b)=histogram(linspace(0,10,100), new=True) + (a,b)=histogram(linspace(0,10,100)) assert_array_equal(a, 10) - def test_normed_new(self): + def test_one_bin(self): + # Ticket 632 + hist,edges = histogram([1,2,3,4],[1,2]) + assert_array_equal(hist,[2, ]) + assert_array_equal(edges,[1,2]) + + def test_normed(self): # Check that the integral of the density equals 1. n = 100 v = rand(n) - a,b = histogram(v, normed=True, new=True) + a,b = histogram(v, normed=True) area = sum(a*diff(b)) assert_almost_equal(area, 1) # Check with non constant bin width v = rand(n)*10 bins = [0,1,5, 9, 10] - a,b = histogram(v, bins, normed=True, new=True) + a,b = histogram(v, bins, normed=True) area = sum(a*diff(b)) assert_almost_equal(area, 1) - def test_outliers_new(self): + def test_outliers(self): # Check that outliers are not tallied a = arange(10)+.5 # Lower outliers - h,b = histogram(a, range=[0,9], new=True) + h,b = histogram(a, range=[0,9]) assert_equal(h.sum(),9) # Upper outliers - h,b = histogram(a, range=[1,10], new=True) + h,b = histogram(a, range=[1,10]) assert_equal(h.sum(),9) # Normalization - h,b = histogram(a, range=[1,9], normed=True, new=True) + h,b = histogram(a, range=[1,9], normed=True) assert_equal((h*diff(b)).sum(),1) # Weights w = arange(10)+.5 - h,b = histogram(a, range=[1,9], weights=w, normed=True, new=True) + h,b = histogram(a, range=[1,9], weights=w, normed=True) assert_equal((h*diff(b)).sum(),1) - h,b = histogram(a, bins=8, range=[1,9], weights=w, new=True) + h,b = histogram(a, bins=8, range=[1,9], weights=w) assert_equal(h, w[1:-1]) - def test_type_new(self): + def test_type(self): # Check the type of the returned histogram a = arange(10)+.5 - h,b = histogram(a, new=True) + h,b = histogram(a) assert(issubdtype(h.dtype, int)) - h,b = histogram(a, normed=True, new=True) + h,b = histogram(a, normed=True) assert(issubdtype(h.dtype, float)) - h,b = histogram(a, weights=ones(10, int), new=True) + h,b = histogram(a, weights=ones(10, int)) assert(issubdtype(h.dtype, int)) - h,b = histogram(a, weights=ones(10, float), new=True) + h,b = histogram(a, weights=ones(10, float)) assert(issubdtype(h.dtype, float)) - def test_weights_new(self): + def test_weights(self): v = rand(100) w = ones(100)*5 - a,b = histogram(v,new=True) - na,nb = histogram(v, normed=True, new=True) - wa,wb = histogram(v, weights=w, new=True) - nwa,nwb = histogram(v, weights=w, normed=True, new=True) + a,b = histogram(v) + na,nb = histogram(v, normed=True) + wa,wb = histogram(v, weights=w) + nwa,nwb = histogram(v, weights=w, normed=True) assert_array_almost_equal(a*5, wa) assert_array_almost_equal(na, nwa) # Check weights are properly applied. v = linspace(0,10,10) w = concatenate((zeros(5), ones(5))) - wa,wb = histogram(v, bins=arange(11),weights=w, new=True) + wa,wb = histogram(v, bins=arange(11),weights=w) assert_array_almost_equal(wa, w) # Check with integer weights - wa, wb = histogram([1,2,2,4], bins=4, weights=[4,3,2,1], new=True) + wa, wb = histogram([1,2,2,4], bins=4, weights=[4,3,2,1]) assert_array_equal(wa, [4,5,0,1]) - wa, wb = histogram([1,2,2,4], bins=4, weights=[4,3,2,1], normed=True, new=True) + wa, wb = histogram([1,2,2,4], bins=4, weights=[4,3,2,1], normed=True) assert_array_equal(wa, array([4,5,0,1])/10./3.*4) class TestHistogramdd(TestCase): |