diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2018-08-18 15:02:22 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-18 15:02:22 -0500 |
commit | 7c5a3f4f9135af6ebe33dcd3242ef3e6db115cd6 (patch) | |
tree | b58b1922a379c7bbc0b71e1d4bf57dc31404b9ec /numpy/lib/histograms.py | |
parent | c33d207e747cf127d7ea81a6ea37d7e367c54c56 (diff) | |
parent | b581aade6891662e37931b04bba29262cd6057de (diff) | |
download | numpy-7c5a3f4f9135af6ebe33dcd3242ef3e6db115cd6.tar.gz |
Merge pull request #11778 from eric-wieser/histogram-overflow
BUG: Avoid signed overflow in histogram
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r-- | numpy/lib/histograms.py | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 422b356f7..f03f30fb0 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -260,6 +260,32 @@ def _get_outer_edges(a, range): return first_edge, last_edge +def _unsigned_subtract(a, b): + """ + Subtract two values where a >= b, and produce an unsigned result + + This is needed when finding the difference between the upper and lower + bound of an int16 histogram + """ + # coerce to a single type + signed_to_unsigned = { + np.byte: np.ubyte, + np.short: np.ushort, + np.intc: np.uintc, + np.int_: np.uint, + np.longlong: np.ulonglong + } + dt = np.result_type(a, b) + try: + dt = signed_to_unsigned[dt.type] + except KeyError: + return np.subtract(a, b, dtype=dt) + else: + # we know the inputs are integers, and we are deliberately casting + # signed to unsigned + return np.subtract(a, b, casting='unsafe', dtype=dt) + + def _get_bin_edges(a, bins, range, weights): """ Computes the bins used internally by `histogram`. @@ -311,7 +337,7 @@ def _get_bin_edges(a, bins, range, weights): # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a) if width: - n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) + n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. @@ -703,7 +729,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, n = np.zeros(n_equal_bins, ntype) # Pre-compute histogram scaling factor - norm = n_equal_bins / (last_edge - first_edge) + norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge) # We iterate over blocks here for two reasons: the first is that for # large arrays, it is actually faster (for example for a 10^8 array it @@ -731,7 +757,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one - f_indices = (tmp_a - first_edge) * norm + f_indices = _unsigned_subtract(tmp_a, first_edge) * norm indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 |