1 files changed, 54 insertions, 17 deletions
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 1b5a71d0e..c96b01d42 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -6,7 +6,7 @@ from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_almost_equal,
     assert_array_almost_equal, assert_raises, assert_allclose,
-    assert_array_max_ulp, assert_warns, assert_raises_regex, suppress_warnings,
+    assert_array_max_ulp, assert_raises_regex, suppress_warnings,
     )
 
 
@@ -289,13 +289,13 @@ class TestHistogram(object):
     def test_object_array_of_0d(self):
         # gh-7864
         assert_raises(ValueError,
-            histogram, [np.array([0.4]) for i in range(10)] + [-np.inf])
+            histogram, [np.array(0.4) for i in range(10)] + [-np.inf])
         assert_raises(ValueError,
-            histogram, [np.array([0.4]) for i in range(10)] + [np.inf])
+            histogram, [np.array(0.4) for i in range(10)] + [np.inf])
 
         # these should not crash
-        np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001])
-        np.histogram([np.array([0.5]) for i in range(10)] + [.5])
+        np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001])
+        np.histogram([np.array(0.5) for i in range(10)] + [.5])
 
     def test_some_nan_values(self):
         # gh-7503
@@ -431,7 +431,7 @@ class TestHistogramOptimBinNums(object):
 
     def test_empty(self):
         estimator_list = ['fd', 'scott', 'rice', 'sturges',
-                          'doane', 'sqrt', 'auto']
+                          'doane', 'sqrt', 'auto', 'stone']
         # check it can deal with empty data
         for estimator in estimator_list:
             a, b = histogram([], bins=estimator)
@@ -447,11 +447,11 @@ class TestHistogramOptimBinNums(object):
         # Some basic sanity checking, with some fixed data.
         # Checking for the correct number of bins
         basic_test = {50:   {'fd': 4,  'scott': 4,  'rice': 8,  'sturges': 7,
-                             'doane': 8, 'sqrt': 8, 'auto': 7},
+                             'doane': 8, 'sqrt': 8, 'auto': 7, 'stone': 2},
                       500:  {'fd': 8,  'scott': 8,  'rice': 16, 'sturges': 10,
-                             'doane': 12, 'sqrt': 23, 'auto': 10},
+                             'doane': 12, 'sqrt': 23, 'auto': 10, 'stone': 9},
                       5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
-                             'doane': 17, 'sqrt': 71, 'auto': 17}}
+                             'doane': 17, 'sqrt': 71, 'auto': 17, 'stone': 20}}
 
         for testlen, expectedResults in basic_test.items():
             # Create some sort of non uniform data to test with
@@ -471,11 +471,11 @@ class TestHistogramOptimBinNums(object):
         precalculated.
         """
         small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
-                         'doane': 1, 'sqrt': 1},
+                         'doane': 1, 'sqrt': 1, 'stone': 1},
                      2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
-                         'doane': 1, 'sqrt': 2},
+                         'doane': 1, 'sqrt': 2, 'stone': 1},
                      3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
-                         'doane': 3, 'sqrt': 2}}
+                         'doane': 3, 'sqrt': 2, 'stone': 1}}
 
         for testlen, expectedResults in small_dat.items():
             testdat = np.arange(testlen)
@@ -499,7 +499,7 @@ class TestHistogramOptimBinNums(object):
         """
         novar_dataset = np.ones(100)
         novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
-                            'doane': 1, 'sqrt': 1, 'auto': 1}
+                            'doane': 1, 'sqrt': 1, 'auto': 1, 'stone': 1}
 
         for estimator, numbins in novar_resultdict.items():
             a, b = np.histogram(novar_dataset, estimator)
@@ -538,12 +538,32 @@ class TestHistogramOptimBinNums(object):
         xcenter = np.linspace(-10, 10, 50)
         outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))
 
-        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}
+        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11, 'stone': 6}
 
         for estimator, numbins in outlier_resultdict.items():
             a, b = np.histogram(outlier_dataset, estimator)
             assert_equal(len(a), numbins)
 
+    def test_scott_vs_stone(self):
+        """Verify that Scott's rule and Stone's rule converges for normally distributed data"""
+
+        def nbins_ratio(seed, size):
+            rng = np.random.RandomState(seed)
+            x = rng.normal(loc=0, scale=2, size=size)
+            a, b = len(np.histogram(x, 'stone')[0]), len(np.histogram(x, 'scott')[0])
+            return a / (a + b)
+
+        ll = [[nbins_ratio(seed, size) for size in np.geomspace(start=10, stop=100, num=4).round().astype(int)]
+              for seed in range(256)]
+
+        # the average difference between the two methods decreases as the dataset size increases.
+        assert_almost_equal(abs(np.mean(ll, axis=0) - 0.5),
+                            [0.1065248,
+                             0.0968844,
+                             0.0331818,
+                             0.0178057],
+                            decimal=3)
+
     def test_simple_range(self):
         """
         Straightforward testing with a mixture of linspace data (for
@@ -555,11 +575,11 @@ class TestHistogramOptimBinNums(object):
         # Checking for the correct number of bins
         basic_test = {
                       50:   {'fd': 8,  'scott': 8,  'rice': 15,
-                             'sturges': 14, 'auto': 14},
+                             'sturges': 14, 'auto': 14, 'stone': 8},
                       500:  {'fd': 15, 'scott': 16, 'rice': 32,
-                             'sturges': 20, 'auto': 20},
+                             'sturges': 20, 'auto': 20, 'stone': 80},
                       5000: {'fd': 33, 'scott': 33, 'rice': 69,
-                             'sturges': 27, 'auto': 33}
+                             'sturges': 27, 'auto': 33, 'stone': 80}
                      }
 
         for testlen, expectedResults in basic_test.items():
@@ -794,3 +814,20 @@ class TestHistogramdd(object):
         hist_dd, edges_dd = histogramdd((v,), (bins,), density=True)
         assert_equal(hist, hist_dd)
         assert_equal(edges, edges_dd[0])
+
+    def test_density_via_normed(self):
+        # normed should simply alias to density argument
+        v = np.arange(10)
+        bins = np.array([0, 1, 3, 6, 10])
+        hist, edges = histogram(v, bins, density=True)
+        hist_dd, edges_dd = histogramdd((v,), (bins,), normed=True)
+        assert_equal(hist, hist_dd)
+        assert_equal(edges, edges_dd[0])
+
+    def test_density_normed_redundancy(self):
+        v = np.arange(10)
+        bins = np.array([0, 1, 3, 6, 10])
+        with assert_raises_regex(TypeError, "Cannot specify both"):
+            hist_dd, edges_dd = histogramdd((v,), (bins,),
+                                            density=True,
+                                            normed=True)