summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiles Cranmer <miles.cranmer@gmail.com>2022-12-25 13:43:43 -0500
committerGitHub <noreply@github.com>2022-12-25 11:43:43 -0700
commit235dbe1f9ea0955c0119f79a5c6614cd0268ef05 (patch)
tree74f4c45d0f1410c0f2dc51699ca555124290f98b
parentc64e17125c0382edaee8facec04487aaf7078a87 (diff)
downloadnumpy-235dbe1f9ea0955c0119f79a5c6614cd0268ef05.tar.gz
BUG: Fix integer overflow in in1d for mixed integer dtypes #22877 (#22878)
* TST: Mixed integer types for in1d * BUG: Fix mixed dtype overflows for in1d (#22877) * BUG: Type conversion for integer overflow check * MAINT: Fix linting issues in in1d * MAINT: ar1 overflow check only for non-empty array * MAINT: Expand bounds of overflow check * TST: Fix integer overflow in mixed boolean test * TST: Include test for overflow on mixed dtypes * MAINT: Less conservative overflow checks
-rw-r--r--numpy/lib/arraysetops.py20
-rw-r--r--numpy/lib/tests/test_arraysetops.py39
2 files changed, 55 insertions, 4 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index cf5f47a82..300bbda26 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -649,8 +649,24 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
ar2_range = int(ar2_max) - int(ar2_min)
# Constraints on whether we can actually use the table method:
- range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max
+ # 1. Assert memory usage is not too large
below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
+ # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype
+ range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max
+ # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype
+ if ar1.size > 0:
+ ar1_min = np.min(ar1)
+ ar1_max = np.max(ar1)
+
+ # After masking, the range of ar1 is guaranteed to be
+ # within the range of ar2:
+ ar1_upper = min(int(ar1_max), int(ar2_max))
+ ar1_lower = max(int(ar1_min), int(ar2_min))
+
+ range_safe_from_overflow &= all((
+ ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,
+ ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min
+ ))
# Optimal performance is for approximately
# log10(size) > (log10(range) - 2.27) / 0.927.
@@ -687,7 +703,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
elif kind == 'table': # not range_safe_from_overflow
raise RuntimeError(
"You have specified kind='table', "
- "but the range of values in `ar2` exceeds the "
+ "but the range of values in `ar2` or `ar1` exceed the "
"maximum integer of the datatype. "
"Please set `kind` to None or 'sort'."
)
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index bb07e25a9..a180accbe 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -414,13 +414,48 @@ class TestSetOps:
with pytest.raises(ValueError):
in1d(a, b, kind="table")
+ @pytest.mark.parametrize(
+ "dtype1,dtype2",
+ [
+ (np.int8, np.int16),
+ (np.int16, np.int8),
+ (np.uint8, np.uint16),
+ (np.uint16, np.uint8),
+ (np.uint8, np.int16),
+ (np.int16, np.uint8),
+ ]
+ )
+ @pytest.mark.parametrize("kind", [None, "sort", "table"])
+ def test_in1d_mixed_dtype(self, dtype1, dtype2, kind):
+ """Test that in1d works as expected for mixed dtype input."""
+ is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
+ ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
+
+ if is_dtype2_signed:
+ ar2 = np.array([-128, 0, 127], dtype=dtype2)
+ else:
+ ar2 = np.array([127, 0, 255], dtype=dtype2)
+
+ expected = np.array([True, True, False, False])
+
+ expect_failure = kind == "table" and any((
+ dtype1 == np.int8 and dtype2 == np.int16,
+ dtype1 == np.int16 and dtype2 == np.int8
+ ))
+
+ if expect_failure:
+ with pytest.raises(RuntimeError, match="exceed the maximum"):
+ in1d(ar1, ar2, kind=kind)
+ else:
+ assert_array_equal(in1d(ar1, ar2, kind=kind), expected)
+
@pytest.mark.parametrize("kind", [None, "sort", "table"])
def test_in1d_mixed_boolean(self, kind):
"""Test that in1d works as expected for bool/int input."""
for dtype in np.typecodes["AllInteger"]:
a = np.array([True, False, False], dtype=bool)
- b = np.array([1, 1, 1, 1], dtype=dtype)
- expected = np.array([True, False, False], dtype=bool)
+ b = np.array([0, 0, 0, 0], dtype=dtype)
+ expected = np.array([False, True, True], dtype=bool)
assert_array_equal(in1d(a, b, kind=kind), expected)
a, b = b, a