diff options
author | Miles Cranmer <miles.cranmer@gmail.com> | 2022-12-25 13:43:43 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-25 11:43:43 -0700 |
commit | 235dbe1f9ea0955c0119f79a5c6614cd0268ef05 (patch) | |
tree | 74f4c45d0f1410c0f2dc51699ca555124290f98b | |
parent | c64e17125c0382edaee8facec04487aaf7078a87 (diff) | |
download | numpy-235dbe1f9ea0955c0119f79a5c6614cd0268ef05.tar.gz |
BUG: Fix integer overflow in in1d for mixed integer dtypes #22877 (#22878)
* TST: Mixed integer types for in1d
* BUG: Fix mixed dtype overflows for in1d (#22877)
* BUG: Type conversion for integer overflow check
* MAINT: Fix linting issues in in1d
* MAINT: ar1 overflow check only for non-empty array
* MAINT: Expand bounds of overflow check
* TST: Fix integer overflow in mixed boolean test
* TST: Include test for overflow on mixed dtypes
* MAINT: Less conservative overflow checks
-rw-r--r-- | numpy/lib/arraysetops.py | 20 | ||||
-rw-r--r-- | numpy/lib/tests/test_arraysetops.py | 39 |
2 files changed, 55 insertions, 4 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index cf5f47a82..300bbda26 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -649,8 +649,24 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): ar2_range = int(ar2_max) - int(ar2_min) # Constraints on whether we can actually use the table method: - range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max + # 1. Assert memory usage is not too large below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size) + # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype + range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max + # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype + if ar1.size > 0: + ar1_min = np.min(ar1) + ar1_max = np.max(ar1) + + # After masking, the range of ar1 is guaranteed to be + # within the range of ar2: + ar1_upper = min(int(ar1_max), int(ar2_max)) + ar1_lower = max(int(ar1_min), int(ar2_min)) + + range_safe_from_overflow &= all(( + ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max, + ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min + )) # Optimal performance is for approximately # log10(size) > (log10(range) - 2.27) / 0.927. @@ -687,7 +703,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): elif kind == 'table': # not range_safe_from_overflow raise RuntimeError( "You have specified kind='table', " - "but the range of values in `ar2` exceeds the " + "but the range of values in `ar2` or `ar1` exceed the " "maximum integer of the datatype. " "Please set `kind` to None or 'sort'." ) diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index bb07e25a9..a180accbe 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -414,13 +414,48 @@ class TestSetOps: with pytest.raises(ValueError): in1d(a, b, kind="table") + @pytest.mark.parametrize( + "dtype1,dtype2", + [ + (np.int8, np.int16), + (np.int16, np.int8), + (np.uint8, np.uint16), + (np.uint16, np.uint8), + (np.uint8, np.int16), + (np.int16, np.uint8), + ] + ) + @pytest.mark.parametrize("kind", [None, "sort", "table"]) + def test_in1d_mixed_dtype(self, dtype1, dtype2, kind): + """Test that in1d works as expected for mixed dtype input.""" + is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger) + ar1 = np.array([0, 0, 1, 1], dtype=dtype1) + + if is_dtype2_signed: + ar2 = np.array([-128, 0, 127], dtype=dtype2) + else: + ar2 = np.array([127, 0, 255], dtype=dtype2) + + expected = np.array([True, True, False, False]) + + expect_failure = kind == "table" and any(( + dtype1 == np.int8 and dtype2 == np.int16, + dtype1 == np.int16 and dtype2 == np.int8 + )) + + if expect_failure: + with pytest.raises(RuntimeError, match="exceed the maximum"): + in1d(ar1, ar2, kind=kind) + else: + assert_array_equal(in1d(ar1, ar2, kind=kind), expected) + @pytest.mark.parametrize("kind", [None, "sort", "table"]) def test_in1d_mixed_boolean(self, kind): """Test that in1d works as expected for bool/int input.""" for dtype in np.typecodes["AllInteger"]: a = np.array([True, False, False], dtype=bool) - b = np.array([1, 1, 1, 1], dtype=dtype) - expected = np.array([True, False, False], dtype=bool) + b = np.array([0, 0, 0, 0], dtype=dtype) + expected = np.array([False, True, True], dtype=bool) assert_array_equal(in1d(a, b, kind=kind), expected) a, b = b, a |