Merge branch 'main' into cython3_noexcept

author: scoder <stefan_ml@behnel.de> 2023-05-04 09:29:53 +0200
committer: GitHub <noreply@github.com> 2023-05-04 09:29:53 +0200
commit: 442c8f48d3146ec32c7d5387310e171276cf10ac (patch)
tree: d8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/lib/tests/test_arraysetops.py
parent: 3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff)
parent: 9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff)
download: numpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz
1 files changed, 304 insertions, 33 deletions
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index 81ba789e3..a180accbe 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -125,32 +125,36 @@ class TestSetOps:
         assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
         assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))
 
-    @pytest.mark.parametrize("ary, prepend, append", [
+    @pytest.mark.parametrize("ary, prepend, append, expected", [
         # should fail because trying to cast
         # np.nan standard floating point value
         # into an integer array:
         (np.array([1, 2, 3], dtype=np.int64),
          None,
-         np.nan),
+         np.nan,
+         'to_end'),
         # should fail because attempting
         # to downcast to int type:
         (np.array([1, 2, 3], dtype=np.int64),
          np.array([5, 7, 2], dtype=np.float32),
-         None),
+         None,
+         'to_begin'),
         # should fail because attempting to cast
         # two special floating point values
-        # to integers (on both sides of ary):
+        # to integers (on both sides of ary),
+        # `to_begin` is in the error message as the impl checks this first:
         (np.array([1., 3., 9.], dtype=np.int8),
          np.nan,
-         np.nan),
+         np.nan,
+         'to_begin'),
          ])
-    def test_ediff1d_forbidden_type_casts(self, ary, prepend, append):
+    def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
         # verify resolution of gh-11490
 
         # specifically, raise an appropriate
         # Exception when attempting to append or
         # prepend with an incompatible type
-        msg = 'must be compatible'
+        msg = 'dtype of `{}` must be compatible'.format(expected)
         with assert_raises_regex(TypeError, msg):
             ediff1d(ary=ary,
                     to_end=append,
@@ -191,7 +195,8 @@ class TestSetOps:
         assert_equal(actual, expected)
         assert actual.dtype == expected.dtype
 
-    def test_isin(self):
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_isin(self, kind):
         # the tests for in1d cover most of isin's behavior
         # if in1d is removed, would need to change those tests to test
         # isin instead.
@@ -201,7 +206,7 @@ class TestSetOps:
         isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
 
         def assert_isin_equal(a, b):
-            x = isin(a, b)
+            x = isin(a, b, kind=kind)
             y = isin_slow(a, b)
             assert_array_equal(x, y)
 
@@ -227,12 +232,32 @@ class TestSetOps:
         assert_isin_equal(5, 6)
 
         # empty array-like:
-        x = []
-        assert_isin_equal(x, b)
-        assert_isin_equal(a, x)
-        assert_isin_equal(x, x)
-
-    def test_in1d(self):
+        if kind != "table":
+            # An empty list will become float64,
+            # which is invalid for kind="table"
+            x = []
+            assert_isin_equal(x, b)
+            assert_isin_equal(a, x)
+            assert_isin_equal(x, x)
+
+        # empty array with various types:
+        for dtype in [bool, np.int64, np.float64]:
+            if kind == "table" and dtype == np.float64:
+                continue
+
+            if dtype in {np.int64, np.float64}:
+                ar = np.array([10, 20, 30], dtype=dtype)
+            elif dtype in {bool}:
+                ar = np.array([True, False, False])
+
+            empty_array = np.array([], dtype=dtype)
+
+            assert_isin_equal(empty_array, ar)
+            assert_isin_equal(ar, empty_array)
+            assert_isin_equal(empty_array, empty_array)
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d(self, kind):
         # we use two different sizes for the b array here to test the
         # two different paths in in1d().
         for mult in (1, 10):
@@ -240,57 +265,58 @@ class TestSetOps:
             a = [5, 7, 1, 2]
             b = [2, 4, 3, 1, 5] * mult
             ec = np.array([True, False, True, True])
-            c = in1d(a, b, assume_unique=True)
+            c = in1d(a, b, assume_unique=True, kind=kind)
             assert_array_equal(c, ec)
 
             a[0] = 8
             ec = np.array([False, False, True, True])
-            c = in1d(a, b, assume_unique=True)
+            c = in1d(a, b, assume_unique=True, kind=kind)
             assert_array_equal(c, ec)
 
             a[0], a[3] = 4, 8
             ec = np.array([True, False, True, False])
-            c = in1d(a, b, assume_unique=True)
+            c = in1d(a, b, assume_unique=True, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
             b = [2, 3, 4] * mult
             ec = [False, True, False, True, True, True, True, True, True,
                   False, True, False, False, False]
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             b = b + [5, 5, 4] * mult
             ec = [True, True, True, True, True, True, True, True, True, True,
                   True, False, True, True]
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 7, 1, 2])
             b = np.array([2, 4, 3, 1, 5] * mult)
             ec = np.array([True, False, True, True])
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 7, 1, 1, 2])
             b = np.array([2, 4, 3, 3, 1, 5] * mult)
             ec = np.array([True, False, True, True, True])
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 5])
             b = np.array([2, 2] * mult)
             ec = np.array([False, False])
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
         a = np.array([5])
         b = np.array([2])
         ec = np.array([False])
-        c = in1d(a, b)
+        c = in1d(a, b, kind=kind)
         assert_array_equal(c, ec)
 
-        assert_array_equal(in1d([], []), [])
+        if kind in {None, "sort"}:
+            assert_array_equal(in1d([], [], kind=kind), [])
 
     def test_in1d_char_array(self):
         a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
@@ -301,16 +327,29 @@ class TestSetOps:
 
         assert_array_equal(c, ec)
 
-    def test_in1d_invert(self):
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_invert(self, kind):
         "Test in1d's invert parameter"
         # We use two different sizes for the b array here to test the
         # two different paths in in1d().
         for mult in (1, 10):
             a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
             b = [2, 3, 4] * mult
-            assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True))
-
-    def test_in1d_ravel(self):
+            assert_array_equal(np.invert(in1d(a, b, kind=kind)),
+                               in1d(a, b, invert=True, kind=kind))
+
+        # float:
+        if kind in {None, "sort"}:
+            for mult in (1, 10):
+                a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
+                            dtype=np.float32)
+                b = [2, 3, 4] * mult
+                b = np.array(b, dtype=np.float32)
+                assert_array_equal(np.invert(in1d(a, b, kind=kind)),
+                                   in1d(a, b, invert=True, kind=kind))
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_ravel(self, kind):
         # Test that in1d ravels its input arrays. This is not documented
         # behavior however. The test is to ensure consistentency.
         a = np.arange(6).reshape(2, 3)
@@ -318,10 +357,110 @@ class TestSetOps:
         long_b = np.arange(3, 63).reshape(30, 2)
         ec = np.array([False, False, False, True, True, True])
 
-        assert_array_equal(in1d(a, b, assume_unique=True), ec)
-        assert_array_equal(in1d(a, b, assume_unique=False), ec)
-        assert_array_equal(in1d(a, long_b, assume_unique=True), ec)
-        assert_array_equal(in1d(a, long_b, assume_unique=False), ec)
+        assert_array_equal(in1d(a, b, assume_unique=True, kind=kind),
+                           ec)
+        assert_array_equal(in1d(a, b, assume_unique=False,
+                                kind=kind),
+                           ec)
+        assert_array_equal(in1d(a, long_b, assume_unique=True,
+                                kind=kind),
+                           ec)
+        assert_array_equal(in1d(a, long_b, assume_unique=False,
+                                kind=kind),
+                           ec)
+
+    def test_in1d_hit_alternate_algorithm(self):
+        """Hit the standard isin code with integers"""
+        # Need extreme range to hit standard code
+        # This hits it without the use of kind='table'
+        a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
+        b = np.array([2, 3, 4, 1e9], dtype=np.int64)
+        expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
+        assert_array_equal(expected, in1d(a, b))
+        assert_array_equal(np.invert(expected), in1d(a, b, invert=True))
+
+        a = np.array([5, 7, 1, 2], dtype=np.int64)
+        b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
+        ec = np.array([True, False, True, True])
+        c = in1d(a, b, assume_unique=True)
+        assert_array_equal(c, ec)
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_boolean(self, kind):
+        """Test that in1d works for boolean input"""
+        a = np.array([True, False])
+        b = np.array([False, False, False])
+        expected = np.array([False, True])
+        assert_array_equal(expected,
+                           in1d(a, b, kind=kind))
+        assert_array_equal(np.invert(expected),
+                           in1d(a, b, invert=True, kind=kind))
+
+    @pytest.mark.parametrize("kind", [None, "sort"])
+    def test_in1d_timedelta(self, kind):
+        """Test that in1d works for timedelta input"""
+        rstate = np.random.RandomState(0)
+        a = rstate.randint(0, 100, size=10)
+        b = rstate.randint(0, 100, size=10)
+        truth = in1d(a, b)
+        a_timedelta = a.astype("timedelta64[s]")
+        b_timedelta = b.astype("timedelta64[s]")
+        assert_array_equal(truth, in1d(a_timedelta, b_timedelta, kind=kind))
+
+    def test_in1d_table_timedelta_fails(self):
+        a = np.array([0, 1, 2], dtype="timedelta64[s]")
+        b = a
+        # Make sure it raises a value error:
+        with pytest.raises(ValueError):
+            in1d(a, b, kind="table")
+
+    @pytest.mark.parametrize(
+        "dtype1,dtype2",
+        [
+            (np.int8, np.int16),
+            (np.int16, np.int8),
+            (np.uint8, np.uint16),
+            (np.uint16, np.uint8),
+            (np.uint8, np.int16),
+            (np.int16, np.uint8),
+        ]
+    )
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_mixed_dtype(self, dtype1, dtype2, kind):
+        """Test that in1d works as expected for mixed dtype input."""
+        is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
+        ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
+
+        if is_dtype2_signed:
+            ar2 = np.array([-128, 0, 127], dtype=dtype2)
+        else:
+            ar2 = np.array([127, 0, 255], dtype=dtype2)
+
+        expected = np.array([True, True, False, False])
+
+        expect_failure = kind == "table" and any((
+            dtype1 == np.int8 and dtype2 == np.int16,
+            dtype1 == np.int16 and dtype2 == np.int8
+        ))
+
+        if expect_failure:
+            with pytest.raises(RuntimeError, match="exceed the maximum"):
+                in1d(ar1, ar2, kind=kind)
+        else:
+            assert_array_equal(in1d(ar1, ar2, kind=kind), expected)
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_mixed_boolean(self, kind):
+        """Test that in1d works as expected for bool/int input."""
+        for dtype in np.typecodes["AllInteger"]:
+            a = np.array([True, False, False], dtype=bool)
+            b = np.array([0, 0, 0, 0], dtype=dtype)
+            expected = np.array([False, True, True], dtype=bool)
+            assert_array_equal(in1d(a, b, kind=kind), expected)
+
+            a, b = b, a
+            expected = np.array([True, True, True, True], dtype=bool)
+            assert_array_equal(in1d(a, b, kind=kind), expected)
 
     def test_in1d_first_array_is_object(self):
         ar1 = [None]
@@ -354,6 +493,73 @@ class TestSetOps:
         result = np.in1d(ar1, ar2)
         assert_array_equal(result, expected)
 
+    def test_in1d_with_arrays_containing_tuples(self):
+        ar1 = np.array([(1,), 2], dtype=object)
+        ar2 = np.array([(1,), 2], dtype=object)
+        expected = np.array([True, True])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+        result = np.in1d(ar1, ar2, invert=True)
+        assert_array_equal(result, np.invert(expected))
+
+        # An integer is added at the end of the array to make sure
+        # that the array builder will create the array with tuples
+        # and after it's created the integer is removed.
+        # There's a bug in the array constructor that doesn't handle
+        # tuples properly and adding the integer fixes that.
+        ar1 = np.array([(1,), (2, 1), 1], dtype=object)
+        ar1 = ar1[:-1]
+        ar2 = np.array([(1,), (2, 1), 1], dtype=object)
+        ar2 = ar2[:-1]
+        expected = np.array([True, True])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+        result = np.in1d(ar1, ar2, invert=True)
+        assert_array_equal(result, np.invert(expected))
+
+        ar1 = np.array([(1,), (2, 3), 1], dtype=object)
+        ar1 = ar1[:-1]
+        ar2 = np.array([(1,), 2], dtype=object)
+        expected = np.array([True, False])
+        result = np.in1d(ar1, ar2)
+        assert_array_equal(result, expected)
+        result = np.in1d(ar1, ar2, invert=True)
+        assert_array_equal(result, np.invert(expected))
+
+    def test_in1d_errors(self):
+        """Test that in1d raises expected errors."""
+
+        # Error 1: `kind` is not one of 'sort' 'table' or None.
+        ar1 = np.array([1, 2, 3, 4, 5])
+        ar2 = np.array([2, 4, 6, 8, 10])
+        assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort')
+
+        # Error 2: `kind="table"` does not work for non-integral arrays.
+        obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
+        obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
+        assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table')
+
+        for dtype in [np.int32, np.int64]:
+            ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
+            # The range of this array will overflow:
+            overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
+
+            # Error 3: `kind="table"` will trigger a runtime error
+            #  if there is an integer overflow expected when computing the
+            #  range of ar2
+            assert_raises(
+                RuntimeError,
+                in1d, ar1, overflow_ar2, kind='table'
+            )
+
+            # Non-error: `kind=None` will *not* trigger a runtime error
+            #  if there is an integer overflow, it will switch to
+            #  the `sort` algorithm.
+            result = np.in1d(ar1, overflow_ar2, kind=None)
+            assert_array_equal(result, [True] + [False] * 4)
+            result = np.in1d(ar1, overflow_ar2, kind='sort')
+            assert_array_equal(result, [True] + [False] * 4)
+
     def test_union1d(self):
         a = np.array([5, 4, 7, 1, 2])
         b = np.array([2, 4, 3, 3, 2, 1, 5])
@@ -527,6 +733,63 @@ class TestUnique:
         assert_equal(a3_idx.dtype, np.intp)
         assert_equal(a3_inv.dtype, np.intp)
 
+        # test for ticket 2111 - float
+        a = [2.0, np.nan, 1.0, np.nan]
+        ua = [1.0, 2.0, np.nan]
+        ua_idx = [2, 0, 1]
+        ua_inv = [1, 2, 0, 2]
+        ua_cnt = [1, 1, 2]
+        assert_equal(np.unique(a), ua)
+        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+        # test for ticket 2111 - complex
+        a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)]
+        ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)]
+        ua_idx = [2, 0, 3]
+        ua_inv = [1, 2, 0, 2, 2]
+        ua_cnt = [1, 1, 3]
+        assert_equal(np.unique(a), ua)
+        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+        # test for ticket 2111 - datetime64
+        nat = np.datetime64('nat')
+        a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
+        ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
+        ua_idx = [2, 0, 1]
+        ua_inv = [1, 2, 0, 2]
+        ua_cnt = [1, 1, 2]
+        assert_equal(np.unique(a), ua)
+        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+        # test for ticket 2111 - timedelta
+        nat = np.timedelta64('nat')
+        a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
+        ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
+        ua_idx = [2, 0, 1]
+        ua_inv = [1, 2, 0, 2]
+        ua_cnt = [1, 1, 2]
+        assert_equal(np.unique(a), ua)
+        assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+        assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+        assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+        # test for gh-19300
+        all_nans = [np.nan] * 4
+        ua = [np.nan]
+        ua_idx = [0]
+        ua_inv = [0, 0, 0, 0]
+        ua_cnt = [4]
+        assert_equal(np.unique(all_nans), ua)
+        assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
+        assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
+        assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
+
     def test_unique_axis_errors(self):
         assert_raises(TypeError, self._run_axis_tests, object)
         assert_raises(TypeError, self._run_axis_tests,
@@ -671,3 +934,11 @@ class TestUnique:
         assert_array_equal(uniq[:, inv], data)
         msg = "Unique's return_counts=True failed with axis=1"
         assert_array_equal(cnt, np.array([2, 1, 1]), msg)
+
+    def test_unique_nanequals(self):
+        # issue 20326
+        a = np.array([1, 1, np.nan, np.nan, np.nan])
+        unq = np.unique(a)
+        not_unq = np.unique(a, equal_nan=False)
+        assert_array_equal(unq, np.array([1, np.nan]))
+        assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan]))
author	scoder <stefan_ml@behnel.de>	2023-05-04 09:29:53 +0200
committer	GitHub <noreply@github.com>	2023-05-04 09:29:53 +0200
commit	442c8f48d3146ec32c7d5387310e171276cf10ac (patch)
tree	d8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/lib/tests/test_arraysetops.py
parent	3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff)
parent	9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff)
download	numpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz