diff options
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r-- | numpy/lib/histograms.py | 58 |
1 files changed, 55 insertions, 3 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 422b356f7..1ff25b81f 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -8,6 +8,7 @@ import warnings import numpy as np from numpy.compat.py3k import basestring +from numpy.core.overrides import array_function_dispatch __all__ = ['histogram', 'histogramdd', 'histogram_bin_edges'] @@ -220,6 +221,14 @@ _hist_bin_selectors = {'auto': _hist_bin_auto, def _ravel_and_check_weights(a, weights): """ Check a and weights have matching shapes, and ravel both """ a = np.asarray(a) + + # Ensure that the array is a "subtractable" dtype + if a.dtype == np.bool_: + warnings.warn("Converting input from {} to {} for compatibility." + .format(a.dtype, np.uint8), + RuntimeWarning, stacklevel=2) + a = a.astype(np.uint8) + if weights is not None: weights = np.asarray(weights) if weights.shape != a.shape: @@ -260,6 +269,32 @@ def _get_outer_edges(a, range): return first_edge, last_edge +def _unsigned_subtract(a, b): + """ + Subtract two values where a >= b, and produce an unsigned result + + This is needed when finding the difference between the upper and lower + bound of an int16 histogram + """ + # coerce to a single type + signed_to_unsigned = { + np.byte: np.ubyte, + np.short: np.ushort, + np.intc: np.uintc, + np.int_: np.uint, + np.longlong: np.ulonglong + } + dt = np.result_type(a, b) + try: + dt = signed_to_unsigned[dt.type] + except KeyError: + return np.subtract(a, b, dtype=dt) + else: + # we know the inputs are integers, and we are deliberately casting + # signed to unsigned + return np.subtract(a, b, casting='unsafe', dtype=dt) + + def _get_bin_edges(a, bins, range, weights): """ Computes the bins used internally by `histogram`. @@ -311,7 +346,7 @@ def _get_bin_edges(a, bins, range, weights): # Do not call selectors on empty arrays width = _hist_bin_selectors[bin_name](a) if width: - n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) + n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width)) else: # Width can be zero for some estimators, e.g. FD when # the IQR of the data is zero. @@ -366,6 +401,11 @@ def _search_sorted_inclusive(a, v): )) +def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None): + return (a, bins, weights) + + +@array_function_dispatch(_histogram_bin_edges_dispatcher) def histogram_bin_edges(a, bins=10, range=None, weights=None): r""" Function to calculate only the edges of the bins used by the `histogram` function. @@ -560,6 +600,12 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): return bin_edges +def _histogram_dispatcher( + a, bins=None, range=None, normed=None, weights=None, density=None): + return (a, bins, weights) + + +@array_function_dispatch(_histogram_dispatcher) def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): r""" @@ -703,7 +749,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, n = np.zeros(n_equal_bins, ntype) # Pre-compute histogram scaling factor - norm = n_equal_bins / (last_edge - first_edge) + norm = n_equal_bins / _unsigned_subtract(last_edge, first_edge) # We iterate over blocks here for two reasons: the first is that for # large arrays, it is actually faster (for example for a 10^8 array it @@ -731,7 +777,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one - f_indices = (tmp_a - first_edge) * norm + f_indices = _unsigned_subtract(tmp_a, first_edge) * norm indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 @@ -812,6 +858,12 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, return n, bin_edges +def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None, + weights=None, density=None): + return (sample, bins, weights) + + +@array_function_dispatch(_histogramdd_dispatcher) def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None): """ |