summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB R S Recht <brsr@users.noreply.github.com>2017-05-04 20:03:09 -0400
committerEric Wieser <wieser.eric@gmail.com>2017-05-05 01:03:09 +0100
commit69b0c42bca27dd5d5522de306bcd7db7deccbfad (patch)
treeb857fc11775a3633bf959a158f5d6be3e7ef7971
parent1d592c12ca7f9c7f471aa8d20b538c5cb4f2cdce (diff)
downloadnumpy-69b0c42bca27dd5d5522de306bcd7db7deccbfad.tar.gz
ENH: Add isin, genereralizing in1d to ND arrays (#8423)
This fixes gh-8331 Also update the docs for arraysetops to remove the outdated "1D" from the description, which was already incorrect for np.unique.
-rw-r--r--doc/release/1.13.0-notes.rst6
-rw-r--r--doc/source/reference/routines.set.rst1
-rw-r--r--numpy/add_newdocs.py2
-rw-r--r--numpy/lib/arraysetops.py102
-rw-r--r--numpy/lib/info.py8
-rw-r--r--numpy/lib/tests/test_arraysetops.py42
-rw-r--r--numpy/ma/extras.py29
-rw-r--r--numpy/ma/tests/test_extras.py23
8 files changed, 203 insertions, 10 deletions
diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst
index 48b9adc9b..6be1afe64 100644
--- a/doc/release/1.13.0-notes.rst
+++ b/doc/release/1.13.0-notes.rst
@@ -157,6 +157,12 @@ In an N-dimensional array, the user can now choose the axis along which to look
for duplicate N-1-dimensional elements using ``numpy.unique``. The original
behaviour is recovered if ``axis=None`` (default).
+``isin`` function, improving on ``in1d``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The new function ``isin`` tests whether each element of an N-dimensonal
+array is present anywhere within a second array. It is an enhancement
+of ``in1d`` that preserves the shape of the first array.
+
``np.gradient`` now supports unevenly spaced data
-------------------------------------------------
Users can now specify a not-constant spacing for data.
diff --git a/doc/source/reference/routines.set.rst b/doc/source/reference/routines.set.rst
index 27c6aeb89..0089fb3e9 100644
--- a/doc/source/reference/routines.set.rst
+++ b/doc/source/reference/routines.set.rst
@@ -17,6 +17,7 @@ Boolean operations
in1d
intersect1d
+ isin
setdiff1d
setxor1d
union1d
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index 6e859bd90..df79ae136 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -1500,7 +1500,7 @@ add_newdoc('numpy.core.multiarray', 'where',
Find the indices of elements of `x` that are in `goodvalues`.
>>> goodvalues = [3, 4, 7]
- >>> ix = np.in1d(x.ravel(), goodvalues).reshape(x.shape)
+ >>> ix = np.isin(x, goodvalues)
>>> ix
array([[False, False, False],
[ True, True, False],
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index fae3e3cbc..9a1448991 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -1,9 +1,10 @@
"""
-Set operations for 1D numeric arrays based on sorting.
+Set operations for arrays based on sorting.
:Contains:
- ediff1d,
unique,
+ isin,
+ ediff1d,
intersect1d,
setxor1d,
in1d,
@@ -31,7 +32,7 @@ import numpy as np
__all__ = [
'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique',
- 'in1d'
+ 'in1d', 'isin'
]
@@ -380,6 +381,7 @@ def setxor1d(ar1, ar2, assume_unique=False):
flag2 = flag[1:] == flag[:-1]
return aux[flag2]
+
def in1d(ar1, ar2, assume_unique=False, invert=False):
"""
Test whether each element of a 1-D array is also present in a second array.
@@ -387,6 +389,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
Returns a boolean array the same length as `ar1` that is True
where an element of `ar1` is in `ar2` and False otherwise.
+ We recommend using :func:`isin` instead of `in1d` for new code.
+
Parameters
----------
ar1 : (M,) array_like
@@ -411,6 +415,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
See Also
--------
+ isin : Version of this function that preserves the
+ shape of ar1.
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
@@ -481,6 +487,96 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
else:
return ret[rev_idx]
+
+def isin(element, test_elements, assume_unique=False, invert=False):
+ """
+ Calculates `element in test_elements`, broadcasting over `element` only.
+ Returns a boolean array of the same shape as `element` that is True
+ where an element of `element` is in `test_elements` and False otherwise.
+
+ Parameters
+ ----------
+ element : array_like
+ Input array.
+ test_elements : array_like
+ The values against which to test each value of `element`.
+ This argument is flattened if it is an array or array_like.
+ See notes for behavior with non-array-like parameters.
+ assume_unique : bool, optional
+ If True, the input arrays are both assumed to be unique, which
+ can speed up the calculation. Default is False.
+ invert : bool, optional
+ If True, the values in the returned array are inverted, as if
+ calculating `element not in test_elements`. Default is False.
+ ``np.isin(a, b, invert=True)`` is equivalent to (but faster
+ than) ``np.invert(np.isin(a, b))``.
+
+ Returns
+ -------
+ isin : ndarray, bool
+ Has the same shape as `element`. The values `element[isin]`
+ are in `test_elements`.
+
+ See Also
+ --------
+ in1d : Flattened version of this function.
+ numpy.lib.arraysetops : Module with a number of other functions for
+ performing set operations on arrays.
+ Notes
+ -----
+
+ `isin` is an element-wise function version of the python keyword `in`.
+ ``isin(a, b)`` is roughly equivalent to
+ ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
+
+ `element` and `test_elements` are converted to arrays if they are not
+ already. If `test_elements` is a set (or other non-sequence collection)
+ it will be converted to an object array with one element, rather than an
+ array of the values contained in `test_elements`. This is a consequence
+ of the `array` constructor's way of handling non-sequence collections.
+ Converting the set to a list usually gives the desired behavior.
+
+ .. versionadded:: 1.13.0
+
+ Examples
+ --------
+ >>> element = 2*np.arange(4).reshape((2, 2))
+ >>> element
+ array([[0, 2],
+ [4, 6]])
+ >>> test_elements = [1, 2, 4, 8]
+ >>> mask = np.isin(element, test_elements)
+ >>> mask
+ array([[ False, True],
+ [ True, False]], dtype=bool)
+ >>> element[mask]
+ array([2, 4])
+ >>> mask = np.isin(element, test_elements, invert=True)
+ >>> mask
+ array([[ True, False],
+ [ False, True]], dtype=bool)
+ >>> element[mask]
+ array([0, 6])
+
+ Because of how `array` handles sets, the following does not
+ work as expected:
+
+ >>> test_set = {1, 2, 4, 8}
+ >>> np.isin(element, test_set)
+ array([[ False, False],
+ [ False, False]], dtype=bool)
+
+ Casting the set to a list gives the expected result:
+
+ >>> np.isin(element, list(test_set))
+ array([[ False, True],
+ [ True, False]], dtype=bool)
+ """
+ element = np.asarray(element)
+ return in1d(element, test_elements, assume_unique=assume_unique,
+ invert=invert).reshape(element.shape)
+
+
def union1d(ar1, ar2):
"""
Find the union of two arrays.
diff --git a/numpy/lib/info.py b/numpy/lib/info.py
index 141df2ace..e00406407 100644
--- a/numpy/lib/info.py
+++ b/numpy/lib/info.py
@@ -136,13 +136,15 @@ Threading Tricks
ParallelExec Execute commands in parallel thread.
================ ===================
-1D Array Set Operations
+Array Set Operations
-----------------------
-Set operations for 1D numeric arrays based on sort() function.
+Set operations for numeric arrays based on sort() function.
================ ===================
-ediff1d Array difference (auxiliary function).
unique Unique elements of an array.
+isin Test whether each element of an ND array is present
+ anywhere within a second array.
+ediff1d Array difference (auxiliary function).
intersect1d Intersection of 1D arrays with unique elements.
setxor1d Set exclusive-or of 1D arrays with unique elements.
in1d Test whether elements in a 1D array are also present in
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index eb4cca0ce..fa664ff24 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -8,7 +8,7 @@ from numpy.testing import (
run_module_suite, TestCase, assert_array_equal, assert_equal, assert_raises
)
from numpy.lib.arraysetops import (
- ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d
+ ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin
)
@@ -77,6 +77,46 @@ class TestSetOps(TestCase):
assert(isinstance(ediff1d(np.matrix(1)), np.matrix))
assert(isinstance(ediff1d(np.matrix(1), to_begin=1), np.matrix))
+ def test_isin(self):
+ # the tests for in1d cover most of isin's behavior
+ # if in1d is removed, would need to change those tests to test
+ # isin instead.
+ def _isin_slow(a, b):
+ b = np.asarray(b).flatten().tolist()
+ return a in b
+ isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
+ def assert_isin_equal(a, b):
+ x = isin(a, b)
+ y = isin_slow(a, b)
+ assert_array_equal(x, y)
+
+ #multidimensional arrays in both arguments
+ a = np.arange(24).reshape([2, 3, 4])
+ b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
+ assert_isin_equal(a, b)
+
+ #array-likes as both arguments
+ c = [(9, 8), (7, 6)]
+ d = (9, 7)
+ assert_isin_equal(c, d)
+
+ #zero-d array:
+ f = np.array(3)
+ assert_isin_equal(f, b)
+ assert_isin_equal(a, f)
+ assert_isin_equal(f, f)
+
+ #scalar:
+ assert_isin_equal(5, b)
+ assert_isin_equal(a, 6)
+ assert_isin_equal(5, 6)
+
+ #empty array-like:
+ x = []
+ assert_isin_equal(x, b)
+ assert_isin_equal(a, x)
+ assert_isin_equal(x, x)
+
def test_in1d(self):
# we use two different sizes for the b array here to test the
# two different paths in in1d().
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 4955d25eb..e100e471c 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -16,7 +16,7 @@ __all__ = [
'column_stack', 'compress_cols', 'compress_nd', 'compress_rowcols',
'compress_rows', 'count_masked', 'corrcoef', 'cov', 'diagflat', 'dot',
'dstack', 'ediff1d', 'flatnotmasked_contiguous', 'flatnotmasked_edges',
- 'hsplit', 'hstack', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols',
+ 'hsplit', 'hstack', 'isin', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols',
'mask_rows', 'masked_all', 'masked_all_like', 'median', 'mr_',
'notmasked_contiguous', 'notmasked_edges', 'polyfit', 'row_stack',
'setdiff1d', 'setxor1d', 'unique', 'union1d', 'vander', 'vstack',
@@ -1131,6 +1131,7 @@ def setxor1d(ar1, ar2, assume_unique=False):
flag2 = (flag[1:] == flag[:-1])
return aux[flag2]
+
def in1d(ar1, ar2, assume_unique=False, invert=False):
"""
Test whether each element of an array is also present in a second
@@ -1138,8 +1139,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
The output is always a masked array. See `numpy.in1d` for more details.
+ We recommend using :func:`isin` instead of `in1d` for new code.
+
See Also
--------
+ isin : Version of this function that preserves the shape of ar1.
numpy.in1d : Equivalent function for ndarrays.
Notes
@@ -1170,6 +1174,29 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
return flag[indx][rev_idx]
+def isin(element, test_elements, assume_unique=False, invert=False):
+ """
+ Calculates `element in test_elements`, broadcasting over
+ `element` only.
+
+ The output is always a masked array of the same shape as `element`.
+ See `numpy.isin` for more details.
+
+ See Also
+ --------
+ in1d : Flattened version of this function.
+ numpy.isin : Equivalent function for ndarrays.
+
+ Notes
+ -----
+ .. versionadded:: 1.13.0
+
+ """
+ element = ma.asarray(element)
+ return in1d(element, test_elements, assume_unique=assume_unique,
+ invert=invert).reshape(element.shape)
+
+
def union1d(ar1, ar2):
"""
Union of two arrays.
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index 77a5c0fc6..e7ebd8b82 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -28,7 +28,7 @@ from numpy.ma.extras import (
median, average, unique, setxor1d, setdiff1d, union1d, intersect1d, in1d,
ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols,
mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous,
- notmasked_contiguous, notmasked_edges, masked_all, masked_all_like,
+ notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin,
diagflat
)
import numpy.ma.extras as mae
@@ -1435,6 +1435,27 @@ class TestArraySetOps(TestCase):
#
assert_array_equal([], setxor1d([], []))
+ def test_isin(self):
+ # the tests for in1d cover most of isin's behavior
+ # if in1d is removed, would need to change those tests to test
+ # isin instead.
+ a = np.arange(24).reshape([2, 3, 4])
+ mask = np.zeros([2, 3, 4])
+ mask[1, 2, 0] = 1
+ a = array(a, mask=mask)
+ b = array(data=[0, 10, 20, 30, 1, 3, 11, 22, 33],
+ mask=[0, 1, 0, 1, 0, 1, 0, 1, 0])
+ ec = zeros((2, 3, 4), dtype=bool)
+ ec[0, 0, 0] = True
+ ec[0, 0, 1] = True
+ ec[0, 2, 3] = True
+ c = isin(a, b)
+ assert_(isinstance(c, MaskedArray))
+ assert_array_equal(c, ec)
+ #compare results of np.isin to ma.isin
+ d = np.isin(a, b[~b.mask]) & ~a.mask
+ assert_array_equal(c, d)
+
def test_in1d(self):
# Test in1d
a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1])