diff options
author | scoder <stefan_ml@behnel.de> | 2023-05-04 09:29:53 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-04 09:29:53 +0200 |
commit | 442c8f48d3146ec32c7d5387310e171276cf10ac (patch) | |
tree | d8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/lib/tests/test_arraysetops.py | |
parent | 3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff) | |
parent | 9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff) | |
download | numpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz |
Merge branch 'main' into cython3_noexcept
Diffstat (limited to 'numpy/lib/tests/test_arraysetops.py')
-rw-r--r-- | numpy/lib/tests/test_arraysetops.py | 337 |
1 files changed, 304 insertions, 33 deletions
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 81ba789e3..a180accbe 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -125,32 +125,36 @@ class TestSetOps: assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7)) assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6])) - @pytest.mark.parametrize("ary, prepend, append", [ + @pytest.mark.parametrize("ary, prepend, append, expected", [ # should fail because trying to cast # np.nan standard floating point value # into an integer array: (np.array([1, 2, 3], dtype=np.int64), None, - np.nan), + np.nan, + 'to_end'), # should fail because attempting # to downcast to int type: (np.array([1, 2, 3], dtype=np.int64), np.array([5, 7, 2], dtype=np.float32), - None), + None, + 'to_begin'), # should fail because attempting to cast # two special floating point values - # to integers (on both sides of ary): + # to integers (on both sides of ary), + # `to_begin` is in the error message as the impl checks this first: (np.array([1., 3., 9.], dtype=np.int8), np.nan, - np.nan), + np.nan, + 'to_begin'), ]) - def test_ediff1d_forbidden_type_casts(self, ary, prepend, append): + def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected): # verify resolution of gh-11490 # specifically, raise an appropriate # Exception when attempting to append or # prepend with an incompatible type - msg = 'must be compatible' + msg = 'dtype of `{}` must be compatible'.format(expected) with assert_raises_regex(TypeError, msg): ediff1d(ary=ary, to_end=append, @@ -191,7 +195,8 @@ class TestSetOps: assert_equal(actual, expected) assert actual.dtype == expected.dtype - def test_isin(self): + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_isin(self, kind): # the tests for in1d cover most of isin's behavior # if in1d is removed, would need to change those tests to test # isin instead. @@ -201,7 +206,7 @@ class TestSetOps: isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1}) def assert_isin_equal(a, b): - x = isin(a, b) + x = isin(a, b, kind=kind) y = isin_slow(a, b) assert_array_equal(x, y) @@ -227,12 +232,32 @@ class TestSetOps: assert_isin_equal(5, 6) # empty array-like: - x = [] - assert_isin_equal(x, b) - assert_isin_equal(a, x) - assert_isin_equal(x, x) - - def test_in1d(self): + if kind != "table": + # An empty list will become float64, + # which is invalid for kind="table" + x = [] + assert_isin_equal(x, b) + assert_isin_equal(a, x) + assert_isin_equal(x, x) + + # empty array with various types: + for dtype in [bool, np.int64, np.float64]: + if kind == "table" and dtype == np.float64: + continue + + if dtype in {np.int64, np.float64}: + ar = np.array([10, 20, 30], dtype=dtype) + elif dtype in {bool}: + ar = np.array([True, False, False]) + + empty_array = np.array([], dtype=dtype) + + assert_isin_equal(empty_array, ar) + assert_isin_equal(ar, empty_array) + assert_isin_equal(empty_array, empty_array) + + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d(self, kind): # we use two different sizes for the b array here to test the # two different paths in in1d(). for mult in (1, 10): @@ -240,57 +265,58 @@ class TestSetOps: a = [5, 7, 1, 2] b = [2, 4, 3, 1, 5] * mult ec = np.array([True, False, True, True]) - c = in1d(a, b, assume_unique=True) + c = in1d(a, b, assume_unique=True, kind=kind) assert_array_equal(c, ec) a[0] = 8 ec = np.array([False, False, True, True]) - c = in1d(a, b, assume_unique=True) + c = in1d(a, b, assume_unique=True, kind=kind) assert_array_equal(c, ec) a[0], a[3] = 4, 8 ec = np.array([True, False, True, False]) - c = in1d(a, b, assume_unique=True) + c = in1d(a, b, assume_unique=True, kind=kind) assert_array_equal(c, ec) a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) b = [2, 3, 4] * mult ec = [False, True, False, True, True, True, True, True, True, False, True, False, False, False] - c = in1d(a, b) + c = in1d(a, b, kind=kind) assert_array_equal(c, ec) b = b + [5, 5, 4] * mult ec = [True, True, True, True, True, True, True, True, True, True, True, False, True, True] - c = in1d(a, b) + c = in1d(a, b, kind=kind) assert_array_equal(c, ec) a = np.array([5, 7, 1, 2]) b = np.array([2, 4, 3, 1, 5] * mult) ec = np.array([True, False, True, True]) - c = in1d(a, b) + c = in1d(a, b, kind=kind) assert_array_equal(c, ec) a = np.array([5, 7, 1, 1, 2]) b = np.array([2, 4, 3, 3, 1, 5] * mult) ec = np.array([True, False, True, True, True]) - c = in1d(a, b) + c = in1d(a, b, kind=kind) assert_array_equal(c, ec) a = np.array([5, 5]) b = np.array([2, 2] * mult) ec = np.array([False, False]) - c = in1d(a, b) + c = in1d(a, b, kind=kind) assert_array_equal(c, ec) a = np.array([5]) b = np.array([2]) ec = np.array([False]) - c = in1d(a, b) + c = in1d(a, b, kind=kind) assert_array_equal(c, ec) - assert_array_equal(in1d([], []), []) + if kind in {None, "sort"}: + assert_array_equal(in1d([], [], kind=kind), []) def test_in1d_char_array(self): a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b']) @@ -301,16 +327,29 @@ class TestSetOps: assert_array_equal(c, ec) - def test_in1d_invert(self): + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d_invert(self, kind): "Test in1d's invert parameter" # We use two different sizes for the b array here to test the # two different paths in in1d(). for mult in (1, 10): a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5]) b = [2, 3, 4] * mult - assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True)) - - def test_in1d_ravel(self): + assert_array_equal(np.invert(in1d(a, b, kind=kind)), + in1d(a, b, invert=True, kind=kind)) + + # float: + if kind in {None, "sort"}: + for mult in (1, 10): + a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5], + dtype=np.float32) + b = [2, 3, 4] * mult + b = np.array(b, dtype=np.float32) + assert_array_equal(np.invert(in1d(a, b, kind=kind)), + in1d(a, b, invert=True, kind=kind)) + + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d_ravel(self, kind): # Test that in1d ravels its input arrays. This is not documented # behavior however. The test is to ensure consistentency. a = np.arange(6).reshape(2, 3) @@ -318,10 +357,110 @@ class TestSetOps: long_b = np.arange(3, 63).reshape(30, 2) ec = np.array([False, False, False, True, True, True]) - assert_array_equal(in1d(a, b, assume_unique=True), ec) - assert_array_equal(in1d(a, b, assume_unique=False), ec) - assert_array_equal(in1d(a, long_b, assume_unique=True), ec) - assert_array_equal(in1d(a, long_b, assume_unique=False), ec) + assert_array_equal(in1d(a, b, assume_unique=True, kind=kind), + ec) + assert_array_equal(in1d(a, b, assume_unique=False, + kind=kind), + ec) + assert_array_equal(in1d(a, long_b, assume_unique=True, + kind=kind), + ec) + assert_array_equal(in1d(a, long_b, assume_unique=False, + kind=kind), + ec) + + def test_in1d_hit_alternate_algorithm(self): + """Hit the standard isin code with integers""" + # Need extreme range to hit standard code + # This hits it without the use of kind='table' + a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64) + b = np.array([2, 3, 4, 1e9], dtype=np.int64) + expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool) + assert_array_equal(expected, in1d(a, b)) + assert_array_equal(np.invert(expected), in1d(a, b, invert=True)) + + a = np.array([5, 7, 1, 2], dtype=np.int64) + b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64) + ec = np.array([True, False, True, True]) + c = in1d(a, b, assume_unique=True) + assert_array_equal(c, ec) + + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d_boolean(self, kind): + """Test that in1d works for boolean input""" + a = np.array([True, False]) + b = np.array([False, False, False]) + expected = np.array([False, True]) + assert_array_equal(expected, + in1d(a, b, kind=kind)) + assert_array_equal(np.invert(expected), + in1d(a, b, invert=True, kind=kind)) + + @pytest.mark.parametrize("kind", [None, "sort"]) + def test_in1d_timedelta(self, kind): + """Test that in1d works for timedelta input""" + rstate = np.random.RandomState(0) + a = rstate.randint(0, 100, size=10) + b = rstate.randint(0, 100, size=10) + truth = in1d(a, b) + a_timedelta = a.astype("timedelta64[s]") + b_timedelta = b.astype("timedelta64[s]") + assert_array_equal(truth, in1d(a_timedelta, b_timedelta, kind=kind)) + + def test_in1d_table_timedelta_fails(self): + a = np.array([0, 1, 2], dtype="timedelta64[s]") + b = a + # Make sure it raises a value error: + with pytest.raises(ValueError): + in1d(a, b, kind="table") + + @pytest.mark.parametrize( + "dtype1,dtype2", + [ + (np.int8, np.int16), + (np.int16, np.int8), + (np.uint8, np.uint16), + (np.uint16, np.uint8), + (np.uint8, np.int16), + (np.int16, np.uint8), + ] + ) + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d_mixed_dtype(self, dtype1, dtype2, kind): + """Test that in1d works as expected for mixed dtype input.""" + is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger) + ar1 = np.array([0, 0, 1, 1], dtype=dtype1) + + if is_dtype2_signed: + ar2 = np.array([-128, 0, 127], dtype=dtype2) + else: + ar2 = np.array([127, 0, 255], dtype=dtype2) + + expected = np.array([True, True, False, False]) + + expect_failure = kind == "table" and any(( + dtype1 == np.int8 and dtype2 == np.int16, + dtype1 == np.int16 and dtype2 == np.int8 + )) + + if expect_failure: + with pytest.raises(RuntimeError, match="exceed the maximum"): + in1d(ar1, ar2, kind=kind) + else: + assert_array_equal(in1d(ar1, ar2, kind=kind), expected) + + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d_mixed_boolean(self, kind): + """Test that in1d works as expected for bool/int input.""" + for dtype in np.typecodes["AllInteger"]: + a = np.array([True, False, False], dtype=bool) + b = np.array([0, 0, 0, 0], dtype=dtype) + expected = np.array([False, True, True], dtype=bool) + assert_array_equal(in1d(a, b, kind=kind), expected) + + a, b = b, a + expected = np.array([True, True, True, True], dtype=bool) + assert_array_equal(in1d(a, b, kind=kind), expected) def test_in1d_first_array_is_object(self): ar1 = [None] @@ -354,6 +493,73 @@ class TestSetOps: result = np.in1d(ar1, ar2) assert_array_equal(result, expected) + def test_in1d_with_arrays_containing_tuples(self): + ar1 = np.array([(1,), 2], dtype=object) + ar2 = np.array([(1,), 2], dtype=object) + expected = np.array([True, True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + result = np.in1d(ar1, ar2, invert=True) + assert_array_equal(result, np.invert(expected)) + + # An integer is added at the end of the array to make sure + # that the array builder will create the array with tuples + # and after it's created the integer is removed. + # There's a bug in the array constructor that doesn't handle + # tuples properly and adding the integer fixes that. + ar1 = np.array([(1,), (2, 1), 1], dtype=object) + ar1 = ar1[:-1] + ar2 = np.array([(1,), (2, 1), 1], dtype=object) + ar2 = ar2[:-1] + expected = np.array([True, True]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + result = np.in1d(ar1, ar2, invert=True) + assert_array_equal(result, np.invert(expected)) + + ar1 = np.array([(1,), (2, 3), 1], dtype=object) + ar1 = ar1[:-1] + ar2 = np.array([(1,), 2], dtype=object) + expected = np.array([True, False]) + result = np.in1d(ar1, ar2) + assert_array_equal(result, expected) + result = np.in1d(ar1, ar2, invert=True) + assert_array_equal(result, np.invert(expected)) + + def test_in1d_errors(self): + """Test that in1d raises expected errors.""" + + # Error 1: `kind` is not one of 'sort' 'table' or None. + ar1 = np.array([1, 2, 3, 4, 5]) + ar2 = np.array([2, 4, 6, 8, 10]) + assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort') + + # Error 2: `kind="table"` does not work for non-integral arrays. + obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object) + obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object) + assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table') + + for dtype in [np.int32, np.int64]: + ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype) + # The range of this array will overflow: + overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype) + + # Error 3: `kind="table"` will trigger a runtime error + # if there is an integer overflow expected when computing the + # range of ar2 + assert_raises( + RuntimeError, + in1d, ar1, overflow_ar2, kind='table' + ) + + # Non-error: `kind=None` will *not* trigger a runtime error + # if there is an integer overflow, it will switch to + # the `sort` algorithm. + result = np.in1d(ar1, overflow_ar2, kind=None) + assert_array_equal(result, [True] + [False] * 4) + result = np.in1d(ar1, overflow_ar2, kind='sort') + assert_array_equal(result, [True] + [False] * 4) + def test_union1d(self): a = np.array([5, 4, 7, 1, 2]) b = np.array([2, 4, 3, 3, 2, 1, 5]) @@ -527,6 +733,63 @@ class TestUnique: assert_equal(a3_idx.dtype, np.intp) assert_equal(a3_inv.dtype, np.intp) + # test for ticket 2111 - float + a = [2.0, np.nan, 1.0, np.nan] + ua = [1.0, 2.0, np.nan] + ua_idx = [2, 0, 1] + ua_inv = [1, 2, 0, 2] + ua_cnt = [1, 1, 2] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for ticket 2111 - complex + a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)] + ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)] + ua_idx = [2, 0, 3] + ua_inv = [1, 2, 0, 2, 2] + ua_cnt = [1, 1, 3] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for ticket 2111 - datetime64 + nat = np.datetime64('nat') + a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat] + ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat] + ua_idx = [2, 0, 1] + ua_inv = [1, 2, 0, 2] + ua_cnt = [1, 1, 2] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for ticket 2111 - timedelta + nat = np.timedelta64('nat') + a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat] + ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat] + ua_idx = [2, 0, 1] + ua_inv = [1, 2, 0, 2] + ua_cnt = [1, 1, 2] + assert_equal(np.unique(a), ua) + assert_equal(np.unique(a, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt)) + + # test for gh-19300 + all_nans = [np.nan] * 4 + ua = [np.nan] + ua_idx = [0] + ua_inv = [0, 0, 0, 0] + ua_cnt = [4] + assert_equal(np.unique(all_nans), ua) + assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx)) + assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv)) + assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt)) + def test_unique_axis_errors(self): assert_raises(TypeError, self._run_axis_tests, object) assert_raises(TypeError, self._run_axis_tests, @@ -671,3 +934,11 @@ class TestUnique: assert_array_equal(uniq[:, inv], data) msg = "Unique's return_counts=True failed with axis=1" assert_array_equal(cnt, np.array([2, 1, 1]), msg) + + def test_unique_nanequals(self): + # issue 20326 + a = np.array([1, 1, np.nan, np.nan, np.nan]) + unq = np.unique(a) + not_unq = np.unique(a, equal_nan=False) + assert_array_equal(unq, np.array([1, np.nan])) + assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan])) |