BUG: Fix integer overflow in in1d for mixed integer dtypes #22877 (#22878)

* TST: Mixed integer types for in1d * BUG: Fix mixed dtype overflows for in1d (#22877) * BUG: Type conversion for integer overflow check * MAINT: Fix linting issues in in1d * MAINT: ar1 overflow check only for non-empty array * MAINT: Expand bounds of overflow check * TST: Fix integer overflow in mixed boolean test * TST: Include test for overflow on mixed dtypes * MAINT: Less conservative overflow checks
author: Miles Cranmer <miles.cranmer@gmail.com> 2022-12-25 13:43:43 -0500
committer: GitHub <noreply@github.com> 2022-12-25 11:43:43 -0700
commit: 235dbe1f9ea0955c0119f79a5c6614cd0268ef05 (patch)
tree: 74f4c45d0f1410c0f2dc51699ca555124290f98b
parent: c64e17125c0382edaee8facec04487aaf7078a87 (diff)
download: numpy-235dbe1f9ea0955c0119f79a5c6614cd0268ef05.tar.gz
2 files changed, 55 insertions, 4 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index cf5f47a82..300bbda26 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -649,8 +649,24 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
         ar2_range = int(ar2_max) - int(ar2_min)
 
         # Constraints on whether we can actually use the table method:
-        range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max
+        #  1. Assert memory usage is not too large
         below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
+        #  2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype
+        range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max
+        #  3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype
+        if ar1.size > 0:
+            ar1_min = np.min(ar1)
+            ar1_max = np.max(ar1)
+
+            # After masking, the range of ar1 is guaranteed to be
+            # within the range of ar2:
+            ar1_upper = min(int(ar1_max), int(ar2_max))
+            ar1_lower = max(int(ar1_min), int(ar2_min))
+
+            range_safe_from_overflow &= all((
+                ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,
+                ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min
+            ))
 
         # Optimal performance is for approximately
         # log10(size) > (log10(range) - 2.27) / 0.927.
@@ -687,7 +703,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
         elif kind == 'table':  # not range_safe_from_overflow
             raise RuntimeError(
                 "You have specified kind='table', "
-                "but the range of values in `ar2` exceeds the "
+                "but the range of values in `ar2` or `ar1` exceed the "
                 "maximum integer of the datatype. "
                 "Please set `kind` to None or 'sort'."
             )
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index bb07e25a9..a180accbe 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -414,13 +414,48 @@ class TestSetOps:
         with pytest.raises(ValueError):
             in1d(a, b, kind="table")
 
+    @pytest.mark.parametrize(
+        "dtype1,dtype2",
+        [
+            (np.int8, np.int16),
+            (np.int16, np.int8),
+            (np.uint8, np.uint16),
+            (np.uint16, np.uint8),
+            (np.uint8, np.int16),
+            (np.int16, np.uint8),
+        ]
+    )
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_mixed_dtype(self, dtype1, dtype2, kind):
+        """Test that in1d works as expected for mixed dtype input."""
+        is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
+        ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
+
+        if is_dtype2_signed:
+            ar2 = np.array([-128, 0, 127], dtype=dtype2)
+        else:
+            ar2 = np.array([127, 0, 255], dtype=dtype2)
+
+        expected = np.array([True, True, False, False])
+
+        expect_failure = kind == "table" and any((
+            dtype1 == np.int8 and dtype2 == np.int16,
+            dtype1 == np.int16 and dtype2 == np.int8
+        ))
+
+        if expect_failure:
+            with pytest.raises(RuntimeError, match="exceed the maximum"):
+                in1d(ar1, ar2, kind=kind)
+        else:
+            assert_array_equal(in1d(ar1, ar2, kind=kind), expected)
+
     @pytest.mark.parametrize("kind", [None, "sort", "table"])
     def test_in1d_mixed_boolean(self, kind):
         """Test that in1d works as expected for bool/int input."""
         for dtype in np.typecodes["AllInteger"]:
             a = np.array([True, False, False], dtype=bool)
-            b = np.array([1, 1, 1, 1], dtype=dtype)
-            expected = np.array([True, False, False], dtype=bool)
+            b = np.array([0, 0, 0, 0], dtype=dtype)
+            expected = np.array([False, True, True], dtype=bool)
             assert_array_equal(in1d(a, b, kind=kind), expected)
 
             a, b = b, a
author	Miles Cranmer <miles.cranmer@gmail.com>	2022-12-25 13:43:43 -0500
committer	GitHub <noreply@github.com>	2022-12-25 11:43:43 -0700
commit	235dbe1f9ea0955c0119f79a5c6614cd0268ef05 (patch)
tree	74f4c45d0f1410c0f2dc51699ca555124290f98b
parent	c64e17125c0382edaee8facec04487aaf7078a87 (diff)
download	numpy-235dbe1f9ea0955c0119f79a5c6614cd0268ef05.tar.gz