summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2018-06-29 23:56:38 -0700
committerEric Wieser <wieser.eric@gmail.com>2018-07-06 12:17:31 -0700
commit307dd76cb0fb770e07524e4cd29cd08e17edc2c3 (patch)
treee190ee007e739a9a47c34f8393e5fbd28a564a79 /numpy/lib/function_base.py
parent739443679b50b43c34808b8fb767bac643fcd91d (diff)
downloadnumpy-307dd76cb0fb770e07524e4cd29cd08e17edc2c3.tar.gz
BUG: Don't convert inputs to `np.float64` in digitize
This converts digitize to a pure-python function that falls back on searchsorted. Performance doesn't really matter here anyway - if you care about performance, then you should just call searchsorted directly, rather than checking the order of the bins. Partially fixes gh-11022
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py112
1 files changed, 111 insertions, 1 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 9a680dd55..1a43da8b0 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -31,7 +31,7 @@ from numpy.core.function_base import add_newdoc
from numpy.lib.twodim_base import diag
from .utils import deprecate
from numpy.core.multiarray import (
- _insert, add_docstring, digitize, bincount, normalize_axis_index,
+ _insert, add_docstring, bincount, normalize_axis_index, _monotonicity,
interp as compiled_interp, interp_complex as compiled_interp_complex
)
from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc
@@ -4493,3 +4493,113 @@ def append(arr, values, axis=None):
values = ravel(values)
axis = arr.ndim-1
return concatenate((arr, values), axis=axis)
+
+
+def digitize(x, bins, right=False):
+ """
+ Return the indices of the bins to which each value in input array belongs.
+
+ ========= ============= ============================
+ `right` order of bins returned index `i` satisfies
+ ========= ============= ============================
+ ``False`` increasing ``bins[i-1] <= x < bins[i]``
+ ``True`` increasing ``bins[i-1] < x <= bins[i]``
+ ``False`` decreasing ``bins[i-1] > x >= bins[i]``
+ ``True`` decreasing ``bins[i-1] >= x > bins[i]``
+ ========= ============= ============================
+
+ If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is
+ returned as appropriate.
+
+ Parameters
+ ----------
+ x : array_like
+ Input array to be binned. Prior to NumPy 1.10.0, this array had to
+ be 1-dimensional, but can now have any shape.
+ bins : array_like
+ Array of bins. It has to be 1-dimensional and monotonic.
+ right : bool, optional
+ Indicating whether the intervals include the right or the left bin
+ edge. Default behavior is (right==False) indicating that the interval
+ does not include the right edge. The left bin end is open in this
+ case, i.e., bins[i-1] <= x < bins[i] is the default behavior for
+ monotonically increasing bins.
+
+ Returns
+ -------
+ indices : ndarray of ints
+ Output array of indices, of same shape as `x`.
+
+ Raises
+ ------
+ ValueError
+ If `bins` is not monotonic.
+ TypeError
+ If the type of the input is complex.
+
+ See Also
+ --------
+ bincount, histogram, unique, searchsorted
+
+ Notes
+ -----
+ If values in `x` are such that they fall outside the bin range,
+ attempting to index `bins` with the indices that `digitize` returns
+ will result in an IndexError.
+
+ .. versionadded:: 1.10.0
+
+ `np.digitize` is implemented in terms of `np.searchsorted`. This means
+ that a binary search is used to bin the values, which scales much better
+ for larger number of bins than the previous linear search. It also removes
+ the requirement for the input array to be 1-dimensional.
+
+ For monotonically _increasing_ `bins`, the following are equivalent::
+
+ np.digitize(x, bins, right=True)
+ np.searchsorted(bins, x, side='left')
+
+ Note that as the order of the arguments are reversed, the side must be too.
+ The `searchsorted` call is marginally faster, as it does not do any
+ monotonicity checks. Perhaps more importantly, it supports all dtypes.
+
+ Examples
+ --------
+ >>> x = np.array([0.2, 6.4, 3.0, 1.6])
+ >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0])
+ >>> inds = np.digitize(x, bins)
+ >>> inds
+ array([1, 4, 3, 2])
+ >>> for n in range(x.size):
+ ... print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]])
+ ...
+ 0.0 <= 0.2 < 1.0
+ 4.0 <= 6.4 < 10.0
+ 2.5 <= 3.0 < 4.0
+ 1.0 <= 1.6 < 2.5
+
+ >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
+ >>> bins = np.array([0, 5, 10, 15, 20])
+ >>> np.digitize(x,bins,right=True)
+ array([1, 2, 3, 4, 4])
+ >>> np.digitize(x,bins,right=False)
+ array([1, 3, 3, 4, 5])
+ """
+ x = _nx.asarray(x)
+ bins = _nx.asarray(bins)
+
+ # here for compatibility, searchsorted below is happy to take this
+ if np.issubdtype(x.dtype, _nx.complexfloating):
+ raise TypeError("x may not be complex")
+
+ mono = _monotonicity(bins)
+ if mono == 0:
+ raise ValueError("bins must be monotonically increasing or decreasing")
+
+ # this is backwards because the arguments below are swapped
+ side = 'left' if right else 'right'
+ if mono == -1:
+ # reverse the bins, and invert the results
+ return len(bins) - _nx.searchsorted(bins[::-1], x, side=side)
+ else:
+ return _nx.searchsorted(bins, x, side=side)