diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/histograms.py | 32 | ||||
-rw-r--r-- | numpy/lib/tests/test_histograms.py | 14 |
2 files changed, 43 insertions, 3 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 422b356f7..f03f30fb0 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -260,6 +260,32 @@ def _get_outer_edges(a, range): return first_edge, last_edge +def _unsigned_subtract(a, b): + """ + Subtract two values where a >= b, and produce an unsigned result + + This is needed when finding the difference between the upper and lower + bound of an int16 histogram + """ + # coerce to a single type + signed_to_unsigned = { + np.byte: np.ubyte, + np.short: np.ushort, + np.intc: np.uintc, + np.int_: np.uint, + np.longlong: np.ulonglong + } + dt = np.result_type(a, b) + try: + dt = signed_to_unsigned[dt.type] + except KeyError: + return np.subtract(a, b, dtype=dt) + else: + # we know the inputs are integers, and we are deliberately casting + # signed to unsigned + return np.subtract(a, b, casting='unsafe', dtype=dt) + + def _get_bin_edges(a, bins, range, weights): """ Computes the bins used internally by `histogram`. @@ -311,7 +337,7 @@ def _get_bin_edges(a, bins, range, weights): # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a) if width: - n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) + n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. @@ -703,7 +729,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, n = np.zeros(n_equal_bins, ntype) # Pre-compute histogram scaling factor - norm = n_equal_bins / (last_edge - first_edge) + norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge) # We iterate over blocks here for two reasons: the first is that for # large arrays, it is actually faster (for example for a 10^8 array it @@ -731,7 +757,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one - f_indices = (tmp_a - first_edge) * norm + f_indices = _unsigned_subtract(tmp_a, first_edge) * norm indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index f136b5c81..561f5f938 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -310,6 +310,20 @@ class TestHistogram(object): assert_equal(d_edge.dtype, dates.dtype) assert_equal(t_edge.dtype, td) + def do_signed_overflow_bounds(self, dtype): + exponent = 8 * np.dtype(dtype).itemsize - 1 + arr = np.array([-2**exponent + 4, 2**exponent - 4], dtype=dtype) + hist, e = histogram(arr, bins=2) + assert_equal(e, [-2**exponent + 4, 0, 2**exponent - 4]) + assert_equal(hist, [1, 1]) + + def test_signed_overflow_bounds(self): + self.do_signed_overflow_bounds(np.byte) + self.do_signed_overflow_bounds(np.short) + self.do_signed_overflow_bounds(np.intc) + self.do_signed_overflow_bounds(np.int_) + self.do_signed_overflow_bounds(np.longlong) + def do_precision_lower_bound(self, float_small, float_large): eps = np.finfo(float_large).eps |