diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2018-08-17 22:42:39 -0700 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2018-08-18 10:31:35 -0700 |
commit | b581aade6891662e37931b04bba29262cd6057de (patch) | |
tree | 488833f95378d30496b39d8b370099c226671096 /numpy | |
parent | 3d9a082d146195944071a81ed0eae9d16976961a (diff) | |
download | numpy-b581aade6891662e37931b04bba29262cd6057de.tar.gz |
BUG: Avoid signed overflow in histogram
When dividing the range between first and last, the formula `(x - first) / (last - first)` is used, where `last >= first`
This falls apart if the types of these variables are signed integers, in which case overflow can occur.
The solution is to force the subtraction result to be unsigned.
This is a regression in 1.15.0
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/histograms.py | 32 | ||||
-rw-r--r-- | numpy/lib/tests/test_histograms.py | 14 |
2 files changed, 43 insertions, 3 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 2922b3a86..1a47acf78 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -259,6 +259,32 @@ def _get_outer_edges(a, range): return first_edge, last_edge +def _unsigned_subtract(a, b): + """ + Subtract two values where a >= b, and produce an unsigned result + + This is needed when finding the difference between the upper and lower + bound of an int16 histogram + """ + # coerce to a single type + signed_to_unsigned = { + np.byte: np.ubyte, + np.short: np.ushort, + np.intc: np.uintc, + np.int_: np.uint, + np.longlong: np.ulonglong + } + dt = np.result_type(a, b) + try: + dt = signed_to_unsigned[dt.type] + except KeyError: + return np.subtract(a, b, dtype=dt) + else: + # we know the inputs are integers, and we are deliberately casting + # signed to unsigned + return np.subtract(a, b, casting='unsafe', dtype=dt) + + def _get_bin_edges(a, bins, range, weights): """ Computes the bins used internally by `histogram`. @@ -310,7 +336,7 @@ def _get_bin_edges(a, bins, range, weights): # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a) if width: - n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) + n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. @@ -704,7 +730,7 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, n = np.zeros(n_equal_bins, ntype) # Pre-compute histogram scaling factor - norm = n_equal_bins / (last_edge - first_edge) + norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge) # We iterate over blocks here for two reasons: the first is that for # large arrays, it is actually faster (for example for a 10^8 array it @@ -732,7 +758,7 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one - f_indices = (tmp_a - first_edge) * norm + f_indices = _unsigned_subtract(tmp_a, first_edge) * norm indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index e16ae12c2..294dd539b 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -298,6 +298,20 @@ class TestHistogram(object): assert_equal(d_edge.dtype, dates.dtype) assert_equal(t_edge.dtype, td) + def do_signed_overflow_bounds(self, dtype): + exponent = 8 * np.dtype(dtype).itemsize - 1 + arr = np.array([-2**exponent + 4, 2**exponent - 4], dtype=dtype) + hist, e = histogram(arr, bins=2) + assert_equal(e, [-2**exponent + 4, 0, 2**exponent - 4]) + assert_equal(hist, [1, 1]) + + def test_signed_overflow_bounds(self): + self.do_signed_overflow_bounds(np.byte) + self.do_signed_overflow_bounds(np.short) + self.do_signed_overflow_bounds(np.intc) + self.do_signed_overflow_bounds(np.int_) + self.do_signed_overflow_bounds(np.longlong) + def do_precision_lower_bound(self, float_small, float_large): eps = np.finfo(float_large).eps |