diff options
-rw-r--r-- | doc/release/1.13.0-notes.rst | 6 | ||||
-rw-r--r-- | doc/source/reference/routines.set.rst | 1 | ||||
-rw-r--r-- | numpy/add_newdocs.py | 2 | ||||
-rw-r--r-- | numpy/lib/arraysetops.py | 102 | ||||
-rw-r--r-- | numpy/lib/info.py | 8 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraysetops.py | 42 | ||||
-rw-r--r-- | numpy/ma/extras.py | 29 | ||||
-rw-r--r-- | numpy/ma/tests/test_extras.py | 23 |
8 files changed, 203 insertions, 10 deletions
diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst index 48b9adc9b..6be1afe64 100644 --- a/doc/release/1.13.0-notes.rst +++ b/doc/release/1.13.0-notes.rst @@ -157,6 +157,12 @@ In an N-dimensional array, the user can now choose the axis along which to look for duplicate N-1-dimensional elements using ``numpy.unique``. The original behaviour is recovered if ``axis=None`` (default). +``isin`` function, improving on ``in1d`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The new function ``isin`` tests whether each element of an N-dimensonal +array is present anywhere within a second array. It is an enhancement +of ``in1d`` that preserves the shape of the first array. + ``np.gradient`` now supports unevenly spaced data ------------------------------------------------- Users can now specify a not-constant spacing for data. diff --git a/doc/source/reference/routines.set.rst b/doc/source/reference/routines.set.rst index 27c6aeb89..0089fb3e9 100644 --- a/doc/source/reference/routines.set.rst +++ b/doc/source/reference/routines.set.rst @@ -17,6 +17,7 @@ Boolean operations in1d intersect1d + isin setdiff1d setxor1d union1d diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 6e859bd90..df79ae136 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -1500,7 +1500,7 @@ add_newdoc('numpy.core.multiarray', 'where', Find the indices of elements of `x` that are in `goodvalues`. >>> goodvalues = [3, 4, 7] - >>> ix = np.in1d(x.ravel(), goodvalues).reshape(x.shape) + >>> ix = np.isin(x, goodvalues) >>> ix array([[False, False, False], [ True, True, False], diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index fae3e3cbc..9a1448991 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -1,9 +1,10 @@ """ -Set operations for 1D numeric arrays based on sorting. +Set operations for arrays based on sorting. :Contains: - ediff1d, unique, + isin, + ediff1d, intersect1d, setxor1d, in1d, @@ -31,7 +32,7 @@ import numpy as np __all__ = [ 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique', - 'in1d' + 'in1d', 'isin' ] @@ -380,6 +381,7 @@ def setxor1d(ar1, ar2, assume_unique=False): flag2 = flag[1:] == flag[:-1] return aux[flag2] + def in1d(ar1, ar2, assume_unique=False, invert=False): """ Test whether each element of a 1-D array is also present in a second array. @@ -387,6 +389,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): Returns a boolean array the same length as `ar1` that is True where an element of `ar1` is in `ar2` and False otherwise. + We recommend using :func:`isin` instead of `in1d` for new code. + Parameters ---------- ar1 : (M,) array_like @@ -411,6 +415,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): See Also -------- + isin : Version of this function that preserves the + shape of ar1. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. @@ -481,6 +487,96 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): else: return ret[rev_idx] + +def isin(element, test_elements, assume_unique=False, invert=False): + """ + Calculates `element in test_elements`, broadcasting over `element` only. + Returns a boolean array of the same shape as `element` that is True + where an element of `element` is in `test_elements` and False otherwise. + + Parameters + ---------- + element : array_like + Input array. + test_elements : array_like + The values against which to test each value of `element`. + This argument is flattened if it is an array or array_like. + See notes for behavior with non-array-like parameters. + assume_unique : bool, optional + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. + invert : bool, optional + If True, the values in the returned array are inverted, as if + calculating `element not in test_elements`. Default is False. + ``np.isin(a, b, invert=True)`` is equivalent to (but faster + than) ``np.invert(np.isin(a, b))``. + + Returns + ------- + isin : ndarray, bool + Has the same shape as `element`. The values `element[isin]` + are in `test_elements`. + + See Also + -------- + in1d : Flattened version of this function. + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + Notes + ----- + + `isin` is an element-wise function version of the python keyword `in`. + ``isin(a, b)`` is roughly equivalent to + ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. + + `element` and `test_elements` are converted to arrays if they are not + already. If `test_elements` is a set (or other non-sequence collection) + it will be converted to an object array with one element, rather than an + array of the values contained in `test_elements`. This is a consequence + of the `array` constructor's way of handling non-sequence collections. + Converting the set to a list usually gives the desired behavior. + + .. versionadded:: 1.13.0 + + Examples + -------- + >>> element = 2*np.arange(4).reshape((2, 2)) + >>> element + array([[0, 2], + [4, 6]]) + >>> test_elements = [1, 2, 4, 8] + >>> mask = np.isin(element, test_elements) + >>> mask + array([[ False, True], + [ True, False]], dtype=bool) + >>> element[mask] + array([2, 4]) + >>> mask = np.isin(element, test_elements, invert=True) + >>> mask + array([[ True, False], + [ False, True]], dtype=bool) + >>> element[mask] + array([0, 6]) + + Because of how `array` handles sets, the following does not + work as expected: + + >>> test_set = {1, 2, 4, 8} + >>> np.isin(element, test_set) + array([[ False, False], + [ False, False]], dtype=bool) + + Casting the set to a list gives the expected result: + + >>> np.isin(element, list(test_set)) + array([[ False, True], + [ True, False]], dtype=bool) + """ + element = np.asarray(element) + return in1d(element, test_elements, assume_unique=assume_unique, + invert=invert).reshape(element.shape) + + def union1d(ar1, ar2): """ Find the union of two arrays. diff --git a/numpy/lib/info.py b/numpy/lib/info.py index 141df2ace..e00406407 100644 --- a/numpy/lib/info.py +++ b/numpy/lib/info.py @@ -136,13 +136,15 @@ Threading Tricks ParallelExec Execute commands in parallel thread. ================ =================== -1D Array Set Operations +Array Set Operations ----------------------- -Set operations for 1D numeric arrays based on sort() function. +Set operations for numeric arrays based on sort() function. ================ =================== -ediff1d Array difference (auxiliary function). unique Unique elements of an array. +isin Test whether each element of an ND array is present + anywhere within a second array. +ediff1d Array difference (auxiliary function). intersect1d Intersection of 1D arrays with unique elements. setxor1d Set exclusive-or of 1D arrays with unique elements. in1d Test whether elements in a 1D array are also present in diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index eb4cca0ce..fa664ff24 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -8,7 +8,7 @@ from numpy.testing import ( run_module_suite, TestCase, assert_array_equal, assert_equal, assert_raises ) from numpy.lib.arraysetops import ( - ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d + ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin ) @@ -77,6 +77,46 @@ class TestSetOps(TestCase): assert(isinstance(ediff1d(np.matrix(1)), np.matrix)) assert(isinstance(ediff1d(np.matrix(1), to_begin=1), np.matrix)) + def test_isin(self): + # the tests for in1d cover most of isin's behavior + # if in1d is removed, would need to change those tests to test + # isin instead. + def _isin_slow(a, b): + b = np.asarray(b).flatten().tolist() + return a in b + isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1}) + def assert_isin_equal(a, b): + x = isin(a, b) + y = isin_slow(a, b) + assert_array_equal(x, y) + + #multidimensional arrays in both arguments + a = np.arange(24).reshape([2, 3, 4]) + b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]]) + assert_isin_equal(a, b) + + #array-likes as both arguments + c = [(9, 8), (7, 6)] + d = (9, 7) + assert_isin_equal(c, d) + + #zero-d array: + f = np.array(3) + assert_isin_equal(f, b) + assert_isin_equal(a, f) + assert_isin_equal(f, f) + + #scalar: + assert_isin_equal(5, b) + assert_isin_equal(a, 6) + assert_isin_equal(5, 6) + + #empty array-like: + x = [] + assert_isin_equal(x, b) + assert_isin_equal(a, x) + assert_isin_equal(x, x) + def test_in1d(self): # we use two different sizes for the b array here to test the # two different paths in in1d(). diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 4955d25eb..e100e471c 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -16,7 +16,7 @@ __all__ = [ 'column_stack', 'compress_cols', 'compress_nd', 'compress_rowcols', 'compress_rows', 'count_masked', 'corrcoef', 'cov', 'diagflat', 'dot', 'dstack', 'ediff1d', 'flatnotmasked_contiguous', 'flatnotmasked_edges', - 'hsplit', 'hstack', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols', + 'hsplit', 'hstack', 'isin', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols', 'mask_rows', 'masked_all', 'masked_all_like', 'median', 'mr_', 'notmasked_contiguous', 'notmasked_edges', 'polyfit', 'row_stack', 'setdiff1d', 'setxor1d', 'unique', 'union1d', 'vander', 'vstack', @@ -1131,6 +1131,7 @@ def setxor1d(ar1, ar2, assume_unique=False): flag2 = (flag[1:] == flag[:-1]) return aux[flag2] + def in1d(ar1, ar2, assume_unique=False, invert=False): """ Test whether each element of an array is also present in a second @@ -1138,8 +1139,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): The output is always a masked array. See `numpy.in1d` for more details. + We recommend using :func:`isin` instead of `in1d` for new code. + See Also -------- + isin : Version of this function that preserves the shape of ar1. numpy.in1d : Equivalent function for ndarrays. Notes @@ -1170,6 +1174,29 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): return flag[indx][rev_idx] +def isin(element, test_elements, assume_unique=False, invert=False): + """ + Calculates `element in test_elements`, broadcasting over + `element` only. + + The output is always a masked array of the same shape as `element`. + See `numpy.isin` for more details. + + See Also + -------- + in1d : Flattened version of this function. + numpy.isin : Equivalent function for ndarrays. + + Notes + ----- + .. versionadded:: 1.13.0 + + """ + element = ma.asarray(element) + return in1d(element, test_elements, assume_unique=assume_unique, + invert=invert).reshape(element.shape) + + def union1d(ar1, ar2): """ Union of two arrays. diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py index 77a5c0fc6..e7ebd8b82 100644 --- a/numpy/ma/tests/test_extras.py +++ b/numpy/ma/tests/test_extras.py @@ -28,7 +28,7 @@ from numpy.ma.extras import ( median, average, unique, setxor1d, setdiff1d, union1d, intersect1d, in1d, ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols, mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous, - notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, + notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin, diagflat ) import numpy.ma.extras as mae @@ -1435,6 +1435,27 @@ class TestArraySetOps(TestCase): # assert_array_equal([], setxor1d([], [])) + def test_isin(self): + # the tests for in1d cover most of isin's behavior + # if in1d is removed, would need to change those tests to test + # isin instead. + a = np.arange(24).reshape([2, 3, 4]) + mask = np.zeros([2, 3, 4]) + mask[1, 2, 0] = 1 + a = array(a, mask=mask) + b = array(data=[0, 10, 20, 30, 1, 3, 11, 22, 33], + mask=[0, 1, 0, 1, 0, 1, 0, 1, 0]) + ec = zeros((2, 3, 4), dtype=bool) + ec[0, 0, 0] = True + ec[0, 0, 1] = True + ec[0, 2, 3] = True + c = isin(a, b) + assert_(isinstance(c, MaskedArray)) + assert_array_equal(c, ec) + #compare results of np.isin to ma.isin + d = np.isin(a, b[~b.mask]) & ~a.mask + assert_array_equal(c, d) + def test_in1d(self): # Test in1d a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) |