diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2018-06-29 23:56:38 -0700 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2018-07-06 12:17:31 -0700 |
commit | 307dd76cb0fb770e07524e4cd29cd08e17edc2c3 (patch) | |
tree | e190ee007e739a9a47c34f8393e5fbd28a564a79 /numpy/lib/function_base.py | |
parent | 739443679b50b43c34808b8fb767bac643fcd91d (diff) | |
download | numpy-307dd76cb0fb770e07524e4cd29cd08e17edc2c3.tar.gz |
BUG: Don't convert inputs to `np.float64` in digitize
This converts digitize to a pure-python function that falls back on searchsorted.
Performance doesn't really matter here anyway - if you care about performance, then you should just call searchsorted directly, rather than checking the order of the bins.
Partially fixes gh-11022
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r-- | numpy/lib/function_base.py | 112 |
1 files changed, 111 insertions, 1 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 9a680dd55..1a43da8b0 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -31,7 +31,7 @@ from numpy.core.function_base import add_newdoc from numpy.lib.twodim_base import diag from .utils import deprecate from numpy.core.multiarray import ( - _insert, add_docstring, digitize, bincount, normalize_axis_index, + _insert, add_docstring, bincount, normalize_axis_index, _monotonicity, interp as compiled_interp, interp_complex as compiled_interp_complex ) from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc @@ -4493,3 +4493,113 @@ def append(arr, values, axis=None): values = ravel(values) axis = arr.ndim-1 return concatenate((arr, values), axis=axis) + + +def digitize(x, bins, right=False): + """ + Return the indices of the bins to which each value in input array belongs. + + ========= ============= ============================ + `right` order of bins returned index `i` satisfies + ========= ============= ============================ + ``False`` increasing ``bins[i-1] <= x < bins[i]`` + ``True`` increasing ``bins[i-1] < x <= bins[i]`` + ``False`` decreasing ``bins[i-1] > x >= bins[i]`` + ``True`` decreasing ``bins[i-1] >= x > bins[i]`` + ========= ============= ============================ + + If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is + returned as appropriate. + + Parameters + ---------- + x : array_like + Input array to be binned. Prior to NumPy 1.10.0, this array had to + be 1-dimensional, but can now have any shape. + bins : array_like + Array of bins. It has to be 1-dimensional and monotonic. + right : bool, optional + Indicating whether the intervals include the right or the left bin + edge. Default behavior is (right==False) indicating that the interval + does not include the right edge. The left bin end is open in this + case, i.e., bins[i-1] <= x < bins[i] is the default behavior for + monotonically increasing bins. + + Returns + ------- + indices : ndarray of ints + Output array of indices, of same shape as `x`. + + Raises + ------ + ValueError + If `bins` is not monotonic. + TypeError + If the type of the input is complex. + + See Also + -------- + bincount, histogram, unique, searchsorted + + Notes + ----- + If values in `x` are such that they fall outside the bin range, + attempting to index `bins` with the indices that `digitize` returns + will result in an IndexError. + + .. versionadded:: 1.10.0 + + `np.digitize` is implemented in terms of `np.searchsorted`. This means + that a binary search is used to bin the values, which scales much better + for larger number of bins than the previous linear search. It also removes + the requirement for the input array to be 1-dimensional. + + For monotonically _increasing_ `bins`, the following are equivalent:: + + np.digitize(x, bins, right=True) + np.searchsorted(bins, x, side='left') + + Note that as the order of the arguments are reversed, the side must be too. + The `searchsorted` call is marginally faster, as it does not do any + monotonicity checks. Perhaps more importantly, it supports all dtypes. + + Examples + -------- + >>> x = np.array([0.2, 6.4, 3.0, 1.6]) + >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0]) + >>> inds = np.digitize(x, bins) + >>> inds + array([1, 4, 3, 2]) + >>> for n in range(x.size): + ... print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]]) + ... + 0.0 <= 0.2 < 1.0 + 4.0 <= 6.4 < 10.0 + 2.5 <= 3.0 < 4.0 + 1.0 <= 1.6 < 2.5 + + >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.]) + >>> bins = np.array([0, 5, 10, 15, 20]) + >>> np.digitize(x,bins,right=True) + array([1, 2, 3, 4, 4]) + >>> np.digitize(x,bins,right=False) + array([1, 3, 3, 4, 5]) + """ + x = _nx.asarray(x) + bins = _nx.asarray(bins) + + # here for compatibility, searchsorted below is happy to take this + if np.issubdtype(x.dtype, _nx.complexfloating): + raise TypeError("x may not be complex") + + mono = _monotonicity(bins) + if mono == 0: + raise ValueError("bins must be monotonically increasing or decreasing") + + # this is backwards because the arguments below are swapped + side = 'left' if right else 'right' + if mono == -1: + # reverse the bins, and invert the results + return len(bins) - _nx.searchsorted(bins[::-1], x, side=side) + else: + return _nx.searchsorted(bins, x, side=side) |