summaryrefslogtreecommitdiff
path: root/numpy/lib/tests/test_arraysetops.py
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2023-05-04 09:29:53 +0200
committerGitHub <noreply@github.com>2023-05-04 09:29:53 +0200
commit442c8f48d3146ec32c7d5387310e171276cf10ac (patch)
treed8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/lib/tests/test_arraysetops.py
parent3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff)
parent9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff)
downloadnumpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz
Merge branch 'main' into cython3_noexcept
Diffstat (limited to 'numpy/lib/tests/test_arraysetops.py')
-rw-r--r--numpy/lib/tests/test_arraysetops.py337
1 files changed, 304 insertions, 33 deletions
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 81ba789e3..a180accbe 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -125,32 +125,36 @@ class TestSetOps:
assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))
- @pytest.mark.parametrize("ary, prepend, append", [
+ @pytest.mark.parametrize("ary, prepend, append, expected", [
# should fail because trying to cast
# np.nan standard floating point value
# into an integer array:
(np.array([1, 2, 3], dtype=np.int64),
None,
- np.nan),
+ np.nan,
+ 'to_end'),
# should fail because attempting
# to downcast to int type:
(np.array([1, 2, 3], dtype=np.int64),
np.array([5, 7, 2], dtype=np.float32),
- None),
+ None,
+ 'to_begin'),
# should fail because attempting to cast
# two special floating point values
- # to integers (on both sides of ary):
+ # to integers (on both sides of ary),
+ # `to_begin` is in the error message as the impl checks this first:
(np.array([1., 3., 9.], dtype=np.int8),
np.nan,
- np.nan),
+ np.nan,
+ 'to_begin'),
])
- def test_ediff1d_forbidden_type_casts(self, ary, prepend, append):
+ def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
# verify resolution of gh-11490
# specifically, raise an appropriate
# Exception when attempting to append or
# prepend with an incompatible type
- msg = 'must be compatible'
+ msg = 'dtype of `{}` must be compatible'.format(expected)
with assert_raises_regex(TypeError, msg):
ediff1d(ary=ary,
to_end=append,
@@ -191,7 +195,8 @@ class TestSetOps:
assert_equal(actual, expected)
assert actual.dtype == expected.dtype
- def test_isin(self):
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_isin(self, kind):
# the tests for in1d cover most of isin's behavior
# if in1d is removed, would need to change those tests to test
# isin instead.
@@ -201,7 +206,7 @@ class TestSetOps:
isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
def assert_isin_equal(a, b):
- x = isin(a, b)
+ x = isin(a, b, kind=kind)
y = isin_slow(a, b)
assert_array_equal(x, y)
@@ -227,12 +232,32 @@ class TestSetOps:
assert_isin_equal(5, 6)
# empty array-like:
- x = []
- assert_isin_equal(x, b)
- assert_isin_equal(a, x)
- assert_isin_equal(x, x)
-
- def test_in1d(self):
+ if kind != "table":
+ # An empty list will become float64,
+ # which is invalid for kind="table"
+ x = []
+ assert_isin_equal(x, b)
+ assert_isin_equal(a, x)
+ assert_isin_equal(x, x)
+
+ # empty array with various types:
+ for dtype in [bool, np.int64, np.float64]:
+ if kind == "table" and dtype == np.float64:
+ continue
+
+ if dtype in {np.int64, np.float64}:
+ ar = np.array([10, 20, 30], dtype=dtype)
+ elif dtype in {bool}:
+ ar = np.array([True, False, False])
+
+ empty_array = np.array([], dtype=dtype)
+
+ assert_isin_equal(empty_array, ar)
+ assert_isin_equal(ar, empty_array)
+ assert_isin_equal(empty_array, empty_array)
+
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d(self, kind):
# we use two different sizes for the b array here to test the
# two different paths in in1d().
for mult in (1, 10):
@@ -240,57 +265,58 @@ class TestSetOps:
a = [5, 7, 1, 2]
b = [2, 4, 3, 1, 5] * mult
ec = np.array([True, False, True, True])
- c = in1d(a, b, assume_unique=True)
+ c = in1d(a, b, assume_unique=True, kind=kind)
assert_array_equal(c, ec)
a[0] = 8
ec = np.array([False, False, True, True])
- c = in1d(a, b, assume_unique=True)
+ c = in1d(a, b, assume_unique=True, kind=kind)
assert_array_equal(c, ec)
a[0], a[3] = 4, 8
ec = np.array([True, False, True, False])
- c = in1d(a, b, assume_unique=True)
+ c = in1d(a, b, assume_unique=True, kind=kind)
assert_array_equal(c, ec)
a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
b = [2, 3, 4] * mult
ec = [False, True, False, True, True, True, True, True, True,
False, True, False, False, False]
- c = in1d(a, b)
+ c = in1d(a, b, kind=kind)
assert_array_equal(c, ec)
b = b + [5, 5, 4] * mult
ec = [True, True, True, True, True, True, True, True, True, True,
True, False, True, True]
- c = in1d(a, b)
+ c = in1d(a, b, kind=kind)
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 2])
b = np.array([2, 4, 3, 1, 5] * mult)
ec = np.array([True, False, True, True])
- c = in1d(a, b)
+ c = in1d(a, b, kind=kind)
assert_array_equal(c, ec)
a = np.array([5, 7, 1, 1, 2])
b = np.array([2, 4, 3, 3, 1, 5] * mult)
ec = np.array([True, False, True, True, True])
- c = in1d(a, b)
+ c = in1d(a, b, kind=kind)
assert_array_equal(c, ec)
a = np.array([5, 5])
b = np.array([2, 2] * mult)
ec = np.array([False, False])
- c = in1d(a, b)
+ c = in1d(a, b, kind=kind)
assert_array_equal(c, ec)
a = np.array([5])
b = np.array([2])
ec = np.array([False])
- c = in1d(a, b)
+ c = in1d(a, b, kind=kind)
assert_array_equal(c, ec)
- assert_array_equal(in1d([], []), [])
+ if kind in {None, "sort"}:
+ assert_array_equal(in1d([], [], kind=kind), [])
def test_in1d_char_array(self):
a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
@@ -301,16 +327,29 @@ class TestSetOps:
assert_array_equal(c, ec)
- def test_in1d_invert(self):
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d_invert(self, kind):
"Test in1d's invert parameter"
# We use two different sizes for the b array here to test the
# two different paths in in1d().
for mult in (1, 10):
a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
b = [2, 3, 4] * mult
- assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True))
-
- def test_in1d_ravel(self):
+ assert_array_equal(np.invert(in1d(a, b, kind=kind)),
+ in1d(a, b, invert=True, kind=kind))
+
+ # float:
+ if kind in {None, "sort"}:
+ for mult in (1, 10):
+ a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
+ dtype=np.float32)
+ b = [2, 3, 4] * mult
+ b = np.array(b, dtype=np.float32)
+ assert_array_equal(np.invert(in1d(a, b, kind=kind)),
+ in1d(a, b, invert=True, kind=kind))
+
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d_ravel(self, kind):
# Test that in1d ravels its input arrays. This is not documented
# behavior however. The test is to ensure consistentency.
a = np.arange(6).reshape(2, 3)
@@ -318,10 +357,110 @@ class TestSetOps:
long_b = np.arange(3, 63).reshape(30, 2)
ec = np.array([False, False, False, True, True, True])
- assert_array_equal(in1d(a, b, assume_unique=True), ec)
- assert_array_equal(in1d(a, b, assume_unique=False), ec)
- assert_array_equal(in1d(a, long_b, assume_unique=True), ec)
- assert_array_equal(in1d(a, long_b, assume_unique=False), ec)
+ assert_array_equal(in1d(a, b, assume_unique=True, kind=kind),
+ ec)
+ assert_array_equal(in1d(a, b, assume_unique=False,
+ kind=kind),
+ ec)
+ assert_array_equal(in1d(a, long_b, assume_unique=True,
+ kind=kind),
+ ec)
+ assert_array_equal(in1d(a, long_b, assume_unique=False,
+ kind=kind),
+ ec)
+
+ def test_in1d_hit_alternate_algorithm(self):
+ """Hit the standard isin code with integers"""
+ # Need extreme range to hit standard code
+ # This hits it without the use of kind='table'
+ a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
+ b = np.array([2, 3, 4, 1e9], dtype=np.int64)
+ expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
+ assert_array_equal(expected, in1d(a, b))
+ assert_array_equal(np.invert(expected), in1d(a, b, invert=True))
+
+ a = np.array([5, 7, 1, 2], dtype=np.int64)
+ b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
+ ec = np.array([True, False, True, True])
+ c = in1d(a, b, assume_unique=True)
+ assert_array_equal(c, ec)
+
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d_boolean(self, kind):
+ """Test that in1d works for boolean input"""
+ a = np.array([True, False])
+ b = np.array([False, False, False])
+ expected = np.array([False, True])
+ assert_array_equal(expected,
+ in1d(a, b, kind=kind))
+ assert_array_equal(np.invert(expected),
+ in1d(a, b, invert=True, kind=kind))
+
+ @pytest.mark.parametrize("kind", [None, "sort"])
+ def test_in1d_timedelta(self, kind):
+ """Test that in1d works for timedelta input"""
+ rstate = np.random.RandomState(0)
+ a = rstate.randint(0, 100, size=10)
+ b = rstate.randint(0, 100, size=10)
+ truth = in1d(a, b)
+ a_timedelta = a.astype("timedelta64[s]")
+ b_timedelta = b.astype("timedelta64[s]")
+ assert_array_equal(truth, in1d(a_timedelta, b_timedelta, kind=kind))
+
+ def test_in1d_table_timedelta_fails(self):
+ a = np.array([0, 1, 2], dtype="timedelta64[s]")
+ b = a
+ # Make sure it raises a value error:
+ with pytest.raises(ValueError):
+ in1d(a, b, kind="table")
+
+ @pytest.mark.parametrize(
+ "dtype1,dtype2",
+ [
+ (np.int8, np.int16),
+ (np.int16, np.int8),
+ (np.uint8, np.uint16),
+ (np.uint16, np.uint8),
+ (np.uint8, np.int16),
+ (np.int16, np.uint8),
+ ]
+ )
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d_mixed_dtype(self, dtype1, dtype2, kind):
+ """Test that in1d works as expected for mixed dtype input."""
+ is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
+ ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
+
+ if is_dtype2_signed:
+ ar2 = np.array([-128, 0, 127], dtype=dtype2)
+ else:
+ ar2 = np.array([127, 0, 255], dtype=dtype2)
+
+ expected = np.array([True, True, False, False])
+
+ expect_failure = kind == "table" and any((
+ dtype1 == np.int8 and dtype2 == np.int16,
+ dtype1 == np.int16 and dtype2 == np.int8
+ ))
+
+ if expect_failure:
+ with pytest.raises(RuntimeError, match="exceed the maximum"):
+ in1d(ar1, ar2, kind=kind)
+ else:
+ assert_array_equal(in1d(ar1, ar2, kind=kind), expected)
+
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d_mixed_boolean(self, kind):
+ """Test that in1d works as expected for bool/int input."""
+ for dtype in np.typecodes["AllInteger"]:
+ a = np.array([True, False, False], dtype=bool)
+ b = np.array([0, 0, 0, 0], dtype=dtype)
+ expected = np.array([False, True, True], dtype=bool)
+ assert_array_equal(in1d(a, b, kind=kind), expected)
+
+ a, b = b, a
+ expected = np.array([True, True, True, True], dtype=bool)
+ assert_array_equal(in1d(a, b, kind=kind), expected)
def test_in1d_first_array_is_object(self):
ar1 = [None]
@@ -354,6 +493,73 @@ class TestSetOps:
result = np.in1d(ar1, ar2)
assert_array_equal(result, expected)
+ def test_in1d_with_arrays_containing_tuples(self):
+ ar1 = np.array([(1,), 2], dtype=object)
+ ar2 = np.array([(1,), 2], dtype=object)
+ expected = np.array([True, True])
+ result = np.in1d(ar1, ar2)
+ assert_array_equal(result, expected)
+ result = np.in1d(ar1, ar2, invert=True)
+ assert_array_equal(result, np.invert(expected))
+
+ # An integer is added at the end of the array to make sure
+ # that the array builder will create the array with tuples
+ # and after it's created the integer is removed.
+ # There's a bug in the array constructor that doesn't handle
+ # tuples properly and adding the integer fixes that.
+ ar1 = np.array([(1,), (2, 1), 1], dtype=object)
+ ar1 = ar1[:-1]
+ ar2 = np.array([(1,), (2, 1), 1], dtype=object)
+ ar2 = ar2[:-1]
+ expected = np.array([True, True])
+ result = np.in1d(ar1, ar2)
+ assert_array_equal(result, expected)
+ result = np.in1d(ar1, ar2, invert=True)
+ assert_array_equal(result, np.invert(expected))
+
+ ar1 = np.array([(1,), (2, 3), 1], dtype=object)
+ ar1 = ar1[:-1]
+ ar2 = np.array([(1,), 2], dtype=object)
+ expected = np.array([True, False])
+ result = np.in1d(ar1, ar2)
+ assert_array_equal(result, expected)
+ result = np.in1d(ar1, ar2, invert=True)
+ assert_array_equal(result, np.invert(expected))
+
+ def test_in1d_errors(self):
+ """Test that in1d raises expected errors."""
+
+ # Error 1: `kind` is not one of 'sort' 'table' or None.
+ ar1 = np.array([1, 2, 3, 4, 5])
+ ar2 = np.array([2, 4, 6, 8, 10])
+ assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort')
+
+ # Error 2: `kind="table"` does not work for non-integral arrays.
+ obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
+ obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
+ assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table')
+
+ for dtype in [np.int32, np.int64]:
+ ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
+ # The range of this array will overflow:
+ overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
+
+ # Error 3: `kind="table"` will trigger a runtime error
+ # if there is an integer overflow expected when computing the
+ # range of ar2
+ assert_raises(
+ RuntimeError,
+ in1d, ar1, overflow_ar2, kind='table'
+ )
+
+ # Non-error: `kind=None` will *not* trigger a runtime error
+ # if there is an integer overflow, it will switch to
+ # the `sort` algorithm.
+ result = np.in1d(ar1, overflow_ar2, kind=None)
+ assert_array_equal(result, [True] + [False] * 4)
+ result = np.in1d(ar1, overflow_ar2, kind='sort')
+ assert_array_equal(result, [True] + [False] * 4)
+
def test_union1d(self):
a = np.array([5, 4, 7, 1, 2])
b = np.array([2, 4, 3, 3, 2, 1, 5])
@@ -527,6 +733,63 @@ class TestUnique:
assert_equal(a3_idx.dtype, np.intp)
assert_equal(a3_inv.dtype, np.intp)
+ # test for ticket 2111 - float
+ a = [2.0, np.nan, 1.0, np.nan]
+ ua = [1.0, 2.0, np.nan]
+ ua_idx = [2, 0, 1]
+ ua_inv = [1, 2, 0, 2]
+ ua_cnt = [1, 1, 2]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for ticket 2111 - complex
+ a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)]
+ ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)]
+ ua_idx = [2, 0, 3]
+ ua_inv = [1, 2, 0, 2, 2]
+ ua_cnt = [1, 1, 3]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for ticket 2111 - datetime64
+ nat = np.datetime64('nat')
+ a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
+ ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
+ ua_idx = [2, 0, 1]
+ ua_inv = [1, 2, 0, 2]
+ ua_cnt = [1, 1, 2]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for ticket 2111 - timedelta
+ nat = np.timedelta64('nat')
+ a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
+ ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
+ ua_idx = [2, 0, 1]
+ ua_inv = [1, 2, 0, 2]
+ ua_cnt = [1, 1, 2]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for gh-19300
+ all_nans = [np.nan] * 4
+ ua = [np.nan]
+ ua_idx = [0]
+ ua_inv = [0, 0, 0, 0]
+ ua_cnt = [4]
+ assert_equal(np.unique(all_nans), ua)
+ assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
+
def test_unique_axis_errors(self):
assert_raises(TypeError, self._run_axis_tests, object)
assert_raises(TypeError, self._run_axis_tests,
@@ -671,3 +934,11 @@ class TestUnique:
assert_array_equal(uniq[:, inv], data)
msg = "Unique's return_counts=True failed with axis=1"
assert_array_equal(cnt, np.array([2, 1, 1]), msg)
+
+ def test_unique_nanequals(self):
+ # issue 20326
+ a = np.array([1, 1, np.nan, np.nan, np.nan])
+ unq = np.unique(a)
+ not_unq = np.unique(a, equal_nan=False)
+ assert_array_equal(unq, np.array([1, np.nan]))
+ assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan]))