From 3ff4924ead45ef6db81778daae08e3c939ea4629 Mon Sep 17 00:00:00 2001
From: Warren Weckesser <warren.weckesser@gmail.com>
Date: Tue, 27 Aug 2019 15:15:26 -0400
Subject: BUG: lib: Fix histogram problem with signed integer arrays.

An input such as

    np.histogram(np.array([-2, 0, 127], dtype=np.int8), bins="auto")

would raise the exception

    ValueError: Number of samples, -1, must be non-negative.

The problem was that the peak-to-peak value for the input array was
computed with the `ptp` method, which returned negative values for
signed integer arrays when the actual value was more than the
maximum signed value of the array's data type.

The fix is to use a peak-to-peak function that returns an
unsigned value for signed integer arrays.

Closes gh-14379.
---
 numpy/lib/histograms.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'numpy/lib/histograms.py')

diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 8474bd5d3..03c365ab6 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -22,6 +22,16 @@ array_function_dispatch = functools.partial(
 _range = range
 
 
+def _ptp(x):
+    """Peak-to-peak value of x.
+
+    This implementation avoids the problem of signed integer arrays having a
+    peak-to-peak value that cannot be represented with the array's data type.
+    This function returns an unsigned value for signed integer arrays.
+    """
+    return _unsigned_subtract(x.max(), x.min())
+
+
 def _hist_bin_sqrt(x, range):
     """
     Square root histogram bin estimator.
@@ -40,7 +50,7 @@ def _hist_bin_sqrt(x, range):
     h : An estimate of the optimal bin width for the given data.
     """
     del range  # unused
-    return x.ptp() / np.sqrt(x.size)
+    return _ptp(x) / np.sqrt(x.size)
 
 
 def _hist_bin_sturges(x, range):
@@ -63,7 +73,7 @@ def _hist_bin_sturges(x, range):
     h : An estimate of the optimal bin width for the given data.
     """
     del range  # unused
-    return x.ptp() / (np.log2(x.size) + 1.0)
+    return _ptp(x) / (np.log2(x.size) + 1.0)
 
 
 def _hist_bin_rice(x, range):
@@ -87,7 +97,7 @@ def _hist_bin_rice(x, range):
     h : An estimate of the optimal bin width for the given data.
     """
     del range  # unused
-    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
+    return _ptp(x) / (2.0 * x.size ** (1.0 / 3))
 
 
 def _hist_bin_scott(x, range):
@@ -137,7 +147,7 @@ def _hist_bin_stone(x, range):
     """
 
     n = x.size
-    ptp_x = np.ptp(x)
+    ptp_x = _ptp(x)
     if n <= 1 or ptp_x == 0:
         return 0
 
@@ -184,7 +194,7 @@ def _hist_bin_doane(x, range):
             np.true_divide(temp, sigma, temp)
             np.power(temp, 3, temp)
             g1 = np.mean(temp)
-            return x.ptp() / (1.0 + np.log2(x.size) +
+            return _ptp(x) / (1.0 + np.log2(x.size) +
                                     np.log2(1.0 + np.absolute(g1) / sg1))
     return 0.0
 
-- 
cgit v1.2.1