From 3ff4924ead45ef6db81778daae08e3c939ea4629 Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Tue, 27 Aug 2019 15:15:26 -0400 Subject: BUG: lib: Fix histogram problem with signed integer arrays. An input such as np.histogram(np.array([-2, 0, 127], dtype=np.int8), bins="auto") would raise the exception ValueError: Number of samples, -1, must be non-negative. The problem was that the peak-to-peak value for the input array was computed with the `ptp` method, which returned negative values for signed integer arrays when the actual value was more than the maximum signed value of the array's data type. The fix is to use a peak-to-peak function that returns an unsigned value for signed integer arrays. Closes gh-14379. --- numpy/lib/histograms.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'numpy/lib/histograms.py') diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 8474bd5d3..03c365ab6 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -22,6 +22,16 @@ array_function_dispatch = functools.partial( _range = range +def _ptp(x): + """Peak-to-peak value of x. + + This implementation avoids the problem of signed integer arrays having a + peak-to-peak value that cannot be represented with the array's data type. + This function returns an unsigned value for signed integer arrays. + """ + return _unsigned_subtract(x.max(), x.min()) + + def _hist_bin_sqrt(x, range): """ Square root histogram bin estimator. @@ -40,7 +50,7 @@ def _hist_bin_sqrt(x, range): h : An estimate of the optimal bin width for the given data. """ del range # unused - return x.ptp() / np.sqrt(x.size) + return _ptp(x) / np.sqrt(x.size) def _hist_bin_sturges(x, range): @@ -63,7 +73,7 @@ def _hist_bin_sturges(x, range): h : An estimate of the optimal bin width for the given data. """ del range # unused - return x.ptp() / (np.log2(x.size) + 1.0) + return _ptp(x) / (np.log2(x.size) + 1.0) def _hist_bin_rice(x, range): @@ -87,7 +97,7 @@ def _hist_bin_rice(x, range): h : An estimate of the optimal bin width for the given data. """ del range # unused - return x.ptp() / (2.0 * x.size ** (1.0 / 3)) + return _ptp(x) / (2.0 * x.size ** (1.0 / 3)) def _hist_bin_scott(x, range): @@ -137,7 +147,7 @@ def _hist_bin_stone(x, range): """ n = x.size - ptp_x = np.ptp(x) + ptp_x = _ptp(x) if n <= 1 or ptp_x == 0: return 0 @@ -184,7 +194,7 @@ def _hist_bin_doane(x, range): np.true_divide(temp, sigma, temp) np.power(temp, 3, temp) g1 = np.mean(temp) - return x.ptp() / (1.0 + np.log2(x.size) + + return _ptp(x) / (1.0 + np.log2(x.size) + np.log2(1.0 + np.absolute(g1) / sg1)) return 0.0 -- cgit v1.2.1