From cedba623b110caf83f46edfa38cb4fbc0191e285 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Mon, 1 Oct 2018 12:05:59 -0400
Subject: MAINT: Optimize np.isin for integer arrays

- This optimization indexes with an intermediary boolean array to speed up
numpy.isin and numpy.in1d for integer arrays over a range of optimal parameters
which are calculated.
---
 numpy/lib/arraysetops.py | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index d42ab2675..c22c0ebf2 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -593,6 +593,46 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     # Ensure that iteration through object arrays yields size-1 arrays
     if ar2.dtype == object:
         ar2 = ar2.reshape(-1, 1)
+    # Check if we can use a fast integer algorithm:
+    integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
+                      np.issubdtype(ar2.dtype, np.integer))
+
+    if integer_arrays:
+        ar2_min = np.min(ar2)
+        ar2_max = np.max(ar2)
+        ar2_range = ar2_max - ar2_min
+        ar2_size = ar2.size
+
+        # Optimal performance is for approximately
+        # log10(size) > (log10(range) - 2.27) / 0.927, see discussion on
+        # https://github.com/numpy/numpy/pull/12065
+        optimal_parameters = (
+                np.log10(ar2_size + 1) >
+                ((np.log10(ar2_range + 1) - 2.27) / 0.927)
+            )
+
+        if optimal_parameters:
+
+            if invert:
+                outgoing_array = np.ones_like(ar1, dtype=np.bool_)
+            else:
+                outgoing_array = np.zeros_like(ar1, dtype=np.bool_)
+
+            # Make elements 1 where the integer exists in ar2
+            if invert:
+                isin_helper_ar = np.ones(ar2_range + 1, dtype=np.bool_)
+                isin_helper_ar[ar2 - ar2_min] = 0
+            else:
+                isin_helper_ar = np.zeros(ar2_range + 1, dtype=np.bool_)
+                isin_helper_ar[ar2 - ar2_min] = 1
+
+            # Mask out elements we know won't work
+            basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)
+            outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -
+                                                        ar2_min]
+
+            return outgoing_array
+
 
     # Check if one of the arrays may contain arbitrary objects
     contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
-- 
cgit v1.2.1


From d2ea8190c769c1c546f6f3fed495772aeeb90f0b Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Tue, 2 Oct 2018 17:37:46 -0400
Subject: MAINT: Optimize np.isin for boolean arrays

- This change converts boolean input to numpy.isin and numpy.in1d into uint8, allowing
the function to use the new optimized code
---
 numpy/lib/arraysetops.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index c22c0ebf2..a0ee8d9b2 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -593,6 +593,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     # Ensure that iteration through object arrays yields size-1 arrays
     if ar2.dtype == object:
         ar2 = ar2.reshape(-1, 1)
+    # Convert booleans to uint8 so we can use the fast integer algorithm
+    if ar1.dtype == np.bool_:
+        ar1 = ar1.view(np.uint8)
+    if ar2.dtype == np.bool_:
+        ar2 = ar2.view(np.uint8)
+
     # Check if we can use a fast integer algorithm:
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
-- 
cgit v1.2.1


From 0f6108c2b37a70571bee6185dbe0ffa92adf995e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 26 Dec 2018 18:57:25 -0500
Subject: MAINT: Check for overflow in integral np.isin

- Before this fix, if, e.g., int8(-100) and int8(100) are the min and
max, respectively, of ar2, ar2_range would experience an integer
overflow and optimal_parameters would be an invalid value.
``if optimal_parameters:`` would skip the fast integer algorithm
regardless, but warnings would be generated when they are unnecessary.
- np.log10(... + 1) was changed to np.log10(... + 1.0) to avoid further
potential problems from overflow here.
- See https://github.com/numpy/numpy/pull/12065 for discussion.
---
 numpy/lib/arraysetops.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a0ee8d9b2..8ae00aeae 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -606,17 +606,24 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     if integer_arrays:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
-        ar2_range = ar2_max - ar2_min
         ar2_size = ar2.size
 
-        # Optimal performance is for approximately
-        # log10(size) > (log10(range) - 2.27) / 0.927, see discussion on
-        # https://github.com/numpy/numpy/pull/12065
-        optimal_parameters = (
-                np.log10(ar2_size + 1) >
-                ((np.log10(ar2_range + 1) - 2.27) / 0.927)
-            )
-
+        # Check for integer overflow
+        with np.errstate(over='raise'):
+            try:
+                ar2_range = ar2_max - ar2_min
+
+                # Optimal performance is for approximately
+                # log10(size) > (log10(range) - 2.27) / 0.927, see discussion on
+                # https://github.com/numpy/numpy/pull/12065
+                optimal_parameters = (
+                        np.log10(ar2_size) >
+                        ((np.log10(ar2_range + 1.0) - 2.27) / 0.927)
+                    )
+            except FloatingPointError:
+                optimal_parameters = False
+
+        # Use the fast integer algorithm
         if optimal_parameters:
 
             if invert:
-- 
cgit v1.2.1


From d6437066f27f81c4a78fb377ef1c61b4969f8159 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 26 Dec 2018 19:49:54 -0500
Subject: TST: Extend np.in1d tests to old algorithm

- Add flag ``_slow_integer`` to np.isin/np.in1d to force the use of the
old isin/in1d algorithm for integers.
---
 numpy/lib/arraysetops.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 8ae00aeae..7597866a4 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -516,12 +516,13 @@ def setxor1d(ar1, ar2, assume_unique=False):
     return aux[flag[1:] & flag[:-1]]
 
 
-def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None):
+def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None,
+                     _slow_integer=None):
     return (ar1, ar2)
 
 
 @array_function_dispatch(_in1d_dispatcher)
-def in1d(ar1, ar2, assume_unique=False, invert=False):
+def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
     """
     Test whether each element of a 1-D array is also present in a second array.
 
@@ -544,6 +545,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
+    _slow_integer : bool/None, optional
+        If True, defaults to the old algorithm for integers. This is
+        used for debugging and testing purposes. The default, None,
+        selects the best based on estimated performance.
 
         .. versionadded:: 1.8.0
 
@@ -603,7 +608,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
-    if integer_arrays:
+    if integer_arrays and _slow_integer in [None, False]:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
         ar2_size = ar2.size
@@ -624,7 +629,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
                 optimal_parameters = False
 
         # Use the fast integer algorithm
-        if optimal_parameters:
+        if optimal_parameters or _slow_integer == False:
 
             if invert:
                 outgoing_array = np.ones_like(ar1, dtype=np.bool_)
@@ -690,12 +695,14 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
         return ret[rev_idx]
 
 
-def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None):
+def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
+                     _slow_integer=None):
     return (element, test_elements)
 
 
 @array_function_dispatch(_isin_dispatcher)
-def isin(element, test_elements, assume_unique=False, invert=False):
+def isin(element, test_elements, assume_unique=False, invert=False,
+         _slow_integer=None):
     """
     Calculates ``element in test_elements``, broadcasting over `element` only.
     Returns a boolean array of the same shape as `element` that is True
@@ -717,6 +724,10 @@ def isin(element, test_elements, assume_unique=False, invert=False):
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
+    _slow_integer : bool/None, optional
+        If True, defaults to the old algorithm for integers. This is
+        used for debugging and testing purposes. The default, None,
+        selects the best based on measured performance.
 
     Returns
     -------
@@ -790,7 +801,8 @@ def isin(element, test_elements, assume_unique=False, invert=False):
     """
     element = np.asarray(element)
     return in1d(element, test_elements, assume_unique=assume_unique,
-                invert=invert).reshape(element.shape)
+                invert=invert, _slow_integer=_slow_integer
+                ).reshape(element.shape)
 
 
 def _union1d_dispatcher(ar1, ar2):
-- 
cgit v1.2.1


From 179d1575ebdf850add391db87da6b1f4d3209e3c Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Thu, 9 Jun 2022 20:48:45 -0400
Subject: MAINT: Fix linting errors in in1d tests

---
 numpy/lib/arraysetops.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 7597866a4..f0cc2006c 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -619,7 +619,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
                 ar2_range = ar2_max - ar2_min
 
                 # Optimal performance is for approximately
-                # log10(size) > (log10(range) - 2.27) / 0.927, see discussion on
+                # log10(size) > (log10(range) - 2.27) / 0.927.
+                # See discussion on
                 # https://github.com/numpy/numpy/pull/12065
                 optimal_parameters = (
                         np.log10(ar2_size) >
-- 
cgit v1.2.1


From f570065dac7bae4b6841224f7ee6b68f08754edd Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 14:33:13 -0400
Subject: MAINT: Change `_slow_integer` parameter to `method`

---
 numpy/lib/arraysetops.py | 47 +++++++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 16 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index f0cc2006c..4ef2468d5 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -517,12 +517,12 @@ def setxor1d(ar1, ar2, assume_unique=False):
 
 
 def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None,
-                     _slow_integer=None):
+                     method='auto'):
     return (ar1, ar2)
 
 
 @array_function_dispatch(_in1d_dispatcher)
-def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
+def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
     """
     Test whether each element of a 1-D array is also present in a second array.
 
@@ -545,10 +545,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
-    _slow_integer : bool/None, optional
-        If True, defaults to the old algorithm for integers. This is
-        used for debugging and testing purposes. The default, None,
-        selects the best based on estimated performance.
+    method : {'auto', 'sort', 'dictionary'}, optional
+        The algorithm to use. This will not affect the final result,
+        but will affect the speed.
+
+        - If 'sort', will use a sort-based approach.
+        - If 'dictionary', will use a key-dictionary approach similar
+          to a radix sort.
+        - If 'auto', will automatically choose the method which is
+          expected to perform the fastest, which depends
+          on the size and range of `ar2`. For larger sizes,
+          'dictionary' is chosen. For larger range or smaller
+          sizes, 'sort' is chosen.
 
         .. versionadded:: 1.8.0
 
@@ -608,7 +616,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
-    if integer_arrays and _slow_integer in [None, False]:
+    if integer_arrays and method in ['auto', 'dictionary']:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
         ar2_size = ar2.size
@@ -630,7 +638,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
                 optimal_parameters = False
 
         # Use the fast integer algorithm
-        if optimal_parameters or _slow_integer == False:
+        if optimal_parameters or method == 'dictionary':
 
             if invert:
                 outgoing_array = np.ones_like(ar1, dtype=np.bool_)
@@ -697,13 +705,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, _slow_integer=None):
 
 
 def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
-                     _slow_integer=None):
+                     method='auto'):
     return (element, test_elements)
 
 
 @array_function_dispatch(_isin_dispatcher)
 def isin(element, test_elements, assume_unique=False, invert=False,
-         _slow_integer=None):
+         method='auto'):
     """
     Calculates ``element in test_elements``, broadcasting over `element` only.
     Returns a boolean array of the same shape as `element` that is True
@@ -725,10 +733,18 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
-    _slow_integer : bool/None, optional
-        If True, defaults to the old algorithm for integers. This is
-        used for debugging and testing purposes. The default, None,
-        selects the best based on measured performance.
+    method : {'auto', 'sort', 'dictionary'}, optional
+        The algorithm to use. This will not affect the final result,
+        but will affect the speed.
+
+        - If 'sort', will use a sort-based approach.
+        - If 'dictionary', will use a key-dictionary approach similar
+          to a radix sort.
+        - If 'auto', will automatically choose the method which is
+          expected to perform the fastest, which depends
+          on the size and range of `ar2`. For larger sizes,
+          'dictionary' is chosen. For larger range or smaller
+          sizes, 'sort' is chosen.
 
     Returns
     -------
@@ -802,8 +818,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
     """
     element = np.asarray(element)
     return in1d(element, test_elements, assume_unique=assume_unique,
-                invert=invert, _slow_integer=_slow_integer
-                ).reshape(element.shape)
+                invert=invert, method=method).reshape(element.shape)
 
 
 def _union1d_dispatcher(ar1, ar2):
-- 
cgit v1.2.1


From 68a1acf41586bb65538af84ce9c74ec5e0872750 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 14:37:59 -0400
Subject: Add check for methods

---
 numpy/lib/arraysetops.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 4ef2468d5..6320ea1a5 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -616,6 +616,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
+    if method not in ['auto', 'sort', 'dictionary']:
+        raise ValueError(
+            "Invalid method: {0}. ".format(method)
+            + "Please use 'auto', 'sort' or 'dictionary'.")
+
     if integer_arrays and method in ['auto', 'dictionary']:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
@@ -659,6 +664,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
                                                         ar2_min]
 
             return outgoing_array
+    elif method == 'dictionary':
+        raise ValueError(
+            "'dictionary' method is only supported for non-integer arrays. "
+            "Please select 'sort' or 'auto' for the method."
+        )
 
 
     # Check if one of the arrays may contain arbitrary objects
-- 
cgit v1.2.1


From 281dadc5fbe8c49df12125dec66040dd55d69e49 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 14:42:34 -0400
Subject: DOC: Specify constraints of method in docstring

---
 numpy/lib/arraysetops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 6320ea1a5..a08c912d6 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -551,7 +551,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
 
         - If 'sort', will use a sort-based approach.
         - If 'dictionary', will use a key-dictionary approach similar
-          to a radix sort.
+          to a radix sort. This is only available for boolean and
+          integer arrays.
         - If 'auto', will automatically choose the method which is
           expected to perform the fastest, which depends
           on the size and range of `ar2`. For larger sizes,
@@ -749,7 +750,8 @@ def isin(element, test_elements, assume_unique=False, invert=False,
 
         - If 'sort', will use a sort-based approach.
         - If 'dictionary', will use a key-dictionary approach similar
-          to a radix sort.
+          to a radix sort. This is only available for boolean and
+          integer arrays.
         - If 'auto', will automatically choose the method which is
           expected to perform the fastest, which depends
           on the size and range of `ar2`. For larger sizes,
-- 
cgit v1.2.1


From bd41739886382ec53e02045cba8a1dc2ab4dfc86 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 14:48:50 -0400
Subject: DOC: Describe default in docstring

---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a08c912d6..8dbb5f765 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -547,7 +547,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
         to (but is faster than) ``np.invert(in1d(a, b))``.
     method : {'auto', 'sort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
-        but will affect the speed.
+        but will affect the speed. Default is 'auto'.
 
         - If 'sort', will use a sort-based approach.
         - If 'dictionary', will use a key-dictionary approach similar
@@ -746,7 +746,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         than) ``np.invert(np.isin(a, b))``.
     method : {'auto', 'sort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
-        but will affect the speed.
+        but will affect the speed. Default is 'auto'.
 
         - If 'sort', will use a sort-based approach.
         - If 'dictionary', will use a key-dictionary approach similar
-- 
cgit v1.2.1


From 93e371f2b38fcbb68219078da6a57174d2b0d46e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 15:51:04 -0400
Subject: MAINT: Fix use of dispatcher for isin

---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 8dbb5f765..44bdd0546 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -517,7 +517,7 @@ def setxor1d(ar1, ar2, assume_unique=False):
 
 
 def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None,
-                     method='auto'):
+                     method=None):
     return (ar1, ar2)
 
 
@@ -716,7 +716,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
 
 
 def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
-                     method='auto'):
+                     method=None):
     return (element, test_elements)
 
 
-- 
cgit v1.2.1


From 8e14d1af5bf1139c55e202b42a9ba78954c339c2 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 15:52:34 -0400
Subject: MAINT: Fix error message

---
 numpy/lib/arraysetops.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 44bdd0546..53a2b6e02 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -667,7 +667,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
             return outgoing_array
     elif method == 'dictionary':
         raise ValueError(
-            "'dictionary' method is only supported for non-integer arrays. "
+            "'dictionary' method is only "
+            "supported for boolean or integer arrays. "
             "Please select 'sort' or 'auto' for the method."
         )
 
-- 
cgit v1.2.1


From 530ccde5297ee36bc8b142b4f2cc8486ba90a678 Mon Sep 17 00:00:00 2001
From: Miles Cranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 16:14:36 -0400
Subject: MAINT: Formatting changes for in1d

Co-authored-by: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com>
---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 53a2b6e02..954e915b7 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -617,12 +617,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
-    if method not in ['auto', 'sort', 'dictionary']:
+    if method not in {'auto', 'sort', 'dictionary'}:
         raise ValueError(
             "Invalid method: {0}. ".format(method)
             + "Please use 'auto', 'sort' or 'dictionary'.")
 
-    if integer_arrays and method in ['auto', 'dictionary']:
+    if integer_arrays and method in {'auto', 'dictionary'}:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
         ar2_size = ar2.size
-- 
cgit v1.2.1


From d3081b65ec8361e2f5b4890286eba441f1081d34 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 16:49:14 -0400
Subject: DOC: Improve docstring explanation

---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 954e915b7..90b5f0419 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -551,7 +551,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
 
         - If 'sort', will use a sort-based approach.
         - If 'dictionary', will use a key-dictionary approach similar
-          to a radix sort. This is only available for boolean and
+          to a counting sort. This is only available for boolean and
           integer arrays.
         - If 'auto', will automatically choose the method which is
           expected to perform the fastest, which depends
@@ -751,7 +751,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
 
         - If 'sort', will use a sort-based approach.
         - If 'dictionary', will use a key-dictionary approach similar
-          to a radix sort. This is only available for boolean and
+          to a counting sort. This is only available for boolean and
           integer arrays.
         - If 'auto', will automatically choose the method which is
           expected to perform the fastest, which depends
-- 
cgit v1.2.1


From d7e2582cd33b22a767286e8a3d95b336dfe51a34 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 16:49:35 -0400
Subject: MAINT: bool instead of np.bool_ dtype

---
 numpy/lib/arraysetops.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 90b5f0419..6d7d1c397 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -608,9 +608,9 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
     if ar2.dtype == object:
         ar2 = ar2.reshape(-1, 1)
     # Convert booleans to uint8 so we can use the fast integer algorithm
-    if ar1.dtype == np.bool_:
+    if ar1.dtype == bool:
         ar1 = ar1.view(np.uint8)
-    if ar2.dtype == np.bool_:
+    if ar2.dtype == bool:
         ar2 = ar2.view(np.uint8)
 
     # Check if we can use a fast integer algorithm:
@@ -647,16 +647,16 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
         if optimal_parameters or method == 'dictionary':
 
             if invert:
-                outgoing_array = np.ones_like(ar1, dtype=np.bool_)
+                outgoing_array = np.ones_like(ar1, dtype=bool)
             else:
-                outgoing_array = np.zeros_like(ar1, dtype=np.bool_)
+                outgoing_array = np.zeros_like(ar1, dtype=bool)
 
             # Make elements 1 where the integer exists in ar2
             if invert:
-                isin_helper_ar = np.ones(ar2_range + 1, dtype=np.bool_)
+                isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)
                 isin_helper_ar[ar2 - ar2_min] = 0
             else:
-                isin_helper_ar = np.zeros(ar2_range + 1, dtype=np.bool_)
+                isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)
                 isin_helper_ar[ar2 - ar2_min] = 1
 
             # Mask out elements we know won't work
-- 
cgit v1.2.1


From 9e6bc7911cbea5d04ba4efe9f3dcaaf389d65cd1 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 19:59:45 -0400
Subject: DOC: Clean up isin docstring

---
 numpy/lib/arraysetops.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 6d7d1c397..4a07d6e9e 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -555,9 +555,9 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
           integer arrays.
         - If 'auto', will automatically choose the method which is
           expected to perform the fastest, which depends
-          on the size and range of `ar2`. For larger sizes,
-          'dictionary' is chosen. For larger range or smaller
-          sizes, 'sort' is chosen.
+          on the size and range of `ar2`. For larger sizes or
+          smaller range, 'dictionary' is chosen.
+          For larger range or smaller sizes, 'sort' is chosen.
 
         .. versionadded:: 1.8.0
 
-- 
cgit v1.2.1


From 7cb937c37f2cb33c096055b3783a006f71c938df Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 10 Jun 2022 20:06:32 -0400
Subject: DOC: Describe memory considerations in in1d/isin

---
 numpy/lib/arraysetops.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 4a07d6e9e..cc6fe6259 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -549,15 +549,21 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default is 'auto'.
 
-        - If 'sort', will use a sort-based approach.
+        - If 'sort', will use a mergesort-based approach. This will have
+          a memory usage of roughly 6 times the sum of the sizes of
+          `ar1` and `ar2`, not accounting for size of dtypes.
         - If 'dictionary', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
-          integer arrays.
+          integer arrays. This will have a memory usage of the
+          size of `ar1` plus the max-min value of `ar2`. This tends
+          to be the faster method if the following formula is true:
+          `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
+          but may use greater memory.
         - If 'auto', will automatically choose the method which is
-          expected to perform the fastest, which depends
-          on the size and range of `ar2`. For larger sizes or
-          smaller range, 'dictionary' is chosen.
-          For larger range or smaller sizes, 'sort' is chosen.
+          expected to perform the fastest, using the above
+          formula. For larger sizes or smaller range,
+          'dictionary' is chosen. For larger range or smaller
+          sizes, 'sort' is chosen.
 
         .. versionadded:: 1.8.0
 
@@ -749,13 +755,19 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default is 'auto'.
 
-        - If 'sort', will use a sort-based approach.
+        - If 'sort', will use a mergesort-based approach. This will have
+          a memory usage of roughly 6 times the sum of the sizes of
+          `ar1` and `ar2`, not accounting for size of dtypes.
         - If 'dictionary', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
-          integer arrays.
+          integer arrays. This will have a memory usage of the
+          size of `ar1` plus the max-min value of `ar2`. This tends
+          to be the faster method if the following formula is true:
+          `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
+          but may use greater memory.
         - If 'auto', will automatically choose the method which is
-          expected to perform the fastest, which depends
-          on the size and range of `ar2`. For larger sizes,
+          expected to perform the fastest, using the above
+          formula. For larger sizes or smaller range,
           'dictionary' is chosen. For larger range or smaller
           sizes, 'sort' is chosen.
 
-- 
cgit v1.2.1


From a8677bb0cf046d2490228c12c282ef05fe08d81b Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 11:08:06 -0400
Subject: MAINT: Switch to old in1d for large memory usage

---
 numpy/lib/arraysetops.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index cc6fe6259..bc5877c4b 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -559,11 +559,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
           to be the faster method if the following formula is true:
           `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
           but may use greater memory.
-        - If 'auto', will automatically choose the method which is
-          expected to perform the fastest, using the above
-          formula. For larger sizes or smaller range,
-          'dictionary' is chosen. For larger range or smaller
-          sizes, 'sort' is chosen.
+        - If 'auto', will automatically choose 'dictionary' if
+          the required memory allocation is less than or equal to
+          6 times the sum of the sizes of `ar1` and `ar2`,
+          otherwise will use 'sort'. This is done to not use
+          a large amount of memory by default, even though
+          'dictionary' may be faster in most cases.
 
         .. versionadded:: 1.8.0
 
@@ -631,6 +632,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
     if integer_arrays and method in {'auto', 'dictionary'}:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
+        ar1_size = ar1.size
         ar2_size = ar2.size
 
         # Check for integer overflow
@@ -640,17 +642,20 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
 
                 # Optimal performance is for approximately
                 # log10(size) > (log10(range) - 2.27) / 0.927.
-                # See discussion on
-                # https://github.com/numpy/numpy/pull/12065
-                optimal_parameters = (
-                        np.log10(ar2_size) >
-                        ((np.log10(ar2_range + 1.0) - 2.27) / 0.927)
-                    )
+                # However, here we set the requirement that
+                # the intermediate array can only be 6x
+                # the combined memory allocation of the original
+                # arrays.
+                # (see discussion on 
+                # https://github.com/numpy/numpy/pull/12065)
+                below_memory_constraint = (
+                    ar2_range <= 6 * (ar1_size + ar2_size)
+                )
             except FloatingPointError:
-                optimal_parameters = False
+                below_memory_constraint = False
 
         # Use the fast integer algorithm
-        if optimal_parameters or method == 'dictionary':
+        if below_memory_constraint or method == 'dictionary':
 
             if invert:
                 outgoing_array = np.ones_like(ar1, dtype=bool)
-- 
cgit v1.2.1


From cde60cee195660f2dafb2993e609d66559525fe8 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 11:54:03 -0400
Subject: MAINT: Switch parameter name to 'kind' over 'method'

---
 numpy/lib/arraysetops.py | 52 ++++++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index bc5877c4b..e697aa07a 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -517,12 +517,12 @@ def setxor1d(ar1, ar2, assume_unique=False):
 
 
 def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None,
-                     method=None):
+                     kind=None):
     return (ar1, ar2)
 
 
 @array_function_dispatch(_in1d_dispatcher)
-def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
+def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
     """
     Test whether each element of a 1-D array is also present in a second array.
 
@@ -545,9 +545,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
-    method : {'auto', 'sort', 'dictionary'}, optional
+   kind : {None, 'sort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
-        but will affect the speed. Default is 'auto'.
+        but will affect the speed. Default will select automatically
+        based on memory considerations.
 
         - If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
@@ -559,12 +560,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
           to be the faster method if the following formula is true:
           `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
           but may use greater memory.
-        - If 'auto', will automatically choose 'dictionary' if
+        - If `None`, will automatically choose 'dictionary' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'sort'. This is done to not use
           a large amount of memory by default, even though
-          'dictionary' may be faster in most cases.
+           'dictionary' may be faster in most cases.
 
         .. versionadded:: 1.8.0
 
@@ -624,12 +625,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
-    if method not in {'auto', 'sort', 'dictionary'}:
+    if kind not in {None, 'sort', 'dictionary'}:
         raise ValueError(
-            "Invalid method: {0}. ".format(method)
-            + "Please use 'auto', 'sort' or 'dictionary'.")
+            "Invalid kind: {0}. ".format(kind)
+            + "Please use None, 'sort' or 'dictionary'.")
 
-    if integer_arrays and method in {'auto', 'dictionary'}:
+    if integer_arrays and kind in {None, 'dictionary'}:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
         ar1_size = ar1.size
@@ -655,7 +656,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
                 below_memory_constraint = False
 
         # Use the fast integer algorithm
-        if below_memory_constraint or method == 'dictionary':
+        if below_memory_constraint or kind == 'dictionary':
 
             if invert:
                 outgoing_array = np.ones_like(ar1, dtype=bool)
@@ -676,11 +677,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
                                                         ar2_min]
 
             return outgoing_array
-    elif method == 'dictionary':
+    elif kind == 'dictionary':
         raise ValueError(
-            "'dictionary' method is only "
+            "The 'dictionary' method is only "
             "supported for boolean or integer arrays. "
-            "Please select 'sort' or 'auto' for the method."
+            "Please select 'sort' or None for kind."
         )
 
 
@@ -728,13 +729,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, method='auto'):
 
 
 def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
-                     method=None):
+                     kind=None):
     return (element, test_elements)
 
 
 @array_function_dispatch(_isin_dispatcher)
 def isin(element, test_elements, assume_unique=False, invert=False,
-         method='auto'):
+         kind=None):
     """
     Calculates ``element in test_elements``, broadcasting over `element` only.
     Returns a boolean array of the same shape as `element` that is True
@@ -756,9 +757,10 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
-    method : {'auto', 'sort', 'dictionary'}, optional
+   kind : {None, 'sort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
-        but will affect the speed. Default is 'auto'.
+        but will affect the speed. Default will select automatically
+        based on memory considerations.
 
         - If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
@@ -770,11 +772,13 @@ def isin(element, test_elements, assume_unique=False, invert=False,
           to be the faster method if the following formula is true:
           `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
           but may use greater memory.
-        - If 'auto', will automatically choose the method which is
-          expected to perform the fastest, using the above
-          formula. For larger sizes or smaller range,
-          'dictionary' is chosen. For larger range or smaller
-          sizes, 'sort' is chosen.
+        - If `None`, will automatically choose 'dictionary' if
+          the required memory allocation is less than or equal to
+          6 times the sum of the sizes of `ar1` and `ar2`,
+          otherwise will use 'sort'. This is done to not use
+          a large amount of memory by default, even though
+           'dictionary' may be faster in most cases.
+
 
     Returns
     -------
@@ -848,7 +852,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
     """
     element = np.asarray(element)
     return in1d(element, test_elements, assume_unique=assume_unique,
-                invert=invert, method=method).reshape(element.shape)
+                invert=invert, kind=kind).reshape(element.shape)
 
 
 def _union1d_dispatcher(ar1, ar2):
-- 
cgit v1.2.1


From 8f5764447cdf6f8ab21ba0f863c65a8d7a7728b5 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 11:59:00 -0400
Subject: MAINT: kind now uses "mergesort" instead of "sort"

---
 numpy/lib/arraysetops.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index e697aa07a..bc25743f7 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -545,12 +545,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
-   kind : {None, 'sort', 'dictionary'}, optional
+   kind : {None, 'mergesort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
 
-        - If 'sort', will use a mergesort-based approach. This will have
+        - If 'mergesort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
         - If 'dictionary', will use a key-dictionary approach similar
@@ -563,7 +563,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         - If `None`, will automatically choose 'dictionary' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
-          otherwise will use 'sort'. This is done to not use
+          otherwise will use 'mergesort'. This is done to not use
           a large amount of memory by default, even though
            'dictionary' may be faster in most cases.
 
@@ -625,10 +625,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
-    if kind not in {None, 'sort', 'dictionary'}:
+    if kind not in {None, 'mergesort', 'dictionary'}:
         raise ValueError(
             "Invalid kind: {0}. ".format(kind)
-            + "Please use None, 'sort' or 'dictionary'.")
+            + "Please use None, 'mergesort' or 'dictionary'.")
 
     if integer_arrays and kind in {None, 'dictionary'}:
         ar2_min = np.min(ar2)
@@ -681,7 +681,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         raise ValueError(
             "The 'dictionary' method is only "
             "supported for boolean or integer arrays. "
-            "Please select 'sort' or None for kind."
+            "Please select 'mergesort' or None for kind."
         )
 
 
@@ -757,12 +757,12 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
-   kind : {None, 'sort', 'dictionary'}, optional
+   kind : {None, 'mergesort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
 
-        - If 'sort', will use a mergesort-based approach. This will have
+        - If 'mergesort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
         - If 'dictionary', will use a key-dictionary approach similar
@@ -775,7 +775,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         - If `None`, will automatically choose 'dictionary' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
-          otherwise will use 'sort'. This is done to not use
+          otherwise will use 'mergesort'. This is done to not use
           a large amount of memory by default, even though
            'dictionary' may be faster in most cases.
 
-- 
cgit v1.2.1


From c5db8e86ebdcb54e33c93386a18b86615789fc50 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 12:37:53 -0400
Subject: MAINT: Protect against integer overflow in in1d

---
 numpy/lib/arraysetops.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index bc25743f7..a67472a22 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -652,11 +652,14 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
                 below_memory_constraint = (
                     ar2_range <= 6 * (ar1_size + ar2_size)
                 )
+                range_safe_from_overflow = True
             except FloatingPointError:
-                below_memory_constraint = False
+                range_safe_from_overflow = False
 
-        # Use the fast integer algorithm
-        if below_memory_constraint or kind == 'dictionary':
+        if (
+            range_safe_from_overflow and 
+            (below_memory_constraint or kind == 'dictionary')
+        ):
 
             if invert:
                 outgoing_array = np.ones_like(ar1, dtype=bool)
-- 
cgit v1.2.1


From 31f739523f97afcec9baa6a872f25e06bd1a4104 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 14:15:55 -0400
Subject: MAINT: Clean up memory checking for in1d

---
 numpy/lib/arraysetops.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a67472a22..a1eca1c01 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -641,21 +641,19 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
             try:
                 ar2_range = ar2_max - ar2_min
 
-                # Optimal performance is for approximately
-                # log10(size) > (log10(range) - 2.27) / 0.927.
-                # However, here we set the requirement that
-                # the intermediate array can only be 6x
-                # the combined memory allocation of the original
-                # arrays.
-                # (see discussion on 
-                # https://github.com/numpy/numpy/pull/12065)
-                below_memory_constraint = (
-                    ar2_range <= 6 * (ar1_size + ar2_size)
-                )
                 range_safe_from_overflow = True
             except FloatingPointError:
                 range_safe_from_overflow = False
 
+        # Optimal performance is for approximately
+        # log10(size) > (log10(range) - 2.27) / 0.927.
+        # However, here we set the requirement that
+        # the intermediate array can only be 6x
+        # the combined memory allocation of the original
+        # arrays. See discussion on 
+        # https://github.com/numpy/numpy/pull/12065.
+        below_memory_constraint = ar2_range <= 6 * (ar1_size + ar2_size)
+
         if (
             range_safe_from_overflow and 
             (below_memory_constraint or kind == 'dictionary')
-- 
cgit v1.2.1


From 3533b861650cff2de7680f1848cd05826f94d480 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 14:28:32 -0400
Subject: MAINT: Clean up integer overflow check in in1d

---
 numpy/lib/arraysetops.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a1eca1c01..339f4e60c 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -633,26 +633,20 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
     if integer_arrays and kind in {None, 'dictionary'}:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
-        ar1_size = ar1.size
-        ar2_size = ar2.size
 
-        # Check for integer overflow
-        with np.errstate(over='raise'):
-            try:
-                ar2_range = ar2_max - ar2_min
+        ar2_range = int(ar2_max) - int(ar2_min)
 
-                range_safe_from_overflow = True
-            except FloatingPointError:
-                range_safe_from_overflow = False
+        # Constraints on whether we can actually use the dictionary method:
+        range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max
+        below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
 
         # Optimal performance is for approximately
         # log10(size) > (log10(range) - 2.27) / 0.927.
-        # However, here we set the requirement that
+        # However, here we set the requirement that by default
         # the intermediate array can only be 6x
         # the combined memory allocation of the original
         # arrays. See discussion on 
         # https://github.com/numpy/numpy/pull/12065.
-        below_memory_constraint = ar2_range <= 6 * (ar1_size + ar2_size)
 
         if (
             range_safe_from_overflow and 
-- 
cgit v1.2.1


From 4b62918c03439b1fe5de95f027d64b32c10a1dcb Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 18:04:52 -0400
Subject: MAINT: RuntimeError for unexpected integer overflow

---
 numpy/lib/arraysetops.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 339f4e60c..8ced2f6d9 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -672,6 +672,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
                                                         ar2_min]
 
             return outgoing_array
+        elif kind == 'dictionary':  # not range_safe_from_overflow
+            raise RuntimeError(
+                "You have specified kind='dictionary', "
+                "but the range of values in `ar2` exceeds the "
+                "maximum integer of the datatype. "
+                "Please set `kind` to None or 'mergesort'."
+            )
     elif kind == 'dictionary':
         raise ValueError(
             "The 'dictionary' method is only "
-- 
cgit v1.2.1


From 76bb035b4755454150e1ae089a0b2331e40fa723 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 17 Jun 2022 19:30:36 -0400
Subject: DOC: Fix formatting issues in docstring

---
 numpy/lib/arraysetops.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 8ced2f6d9..8af8c8803 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -549,7 +549,6 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
-
         - If 'mergesort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
@@ -565,7 +564,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'mergesort'. This is done to not use
           a large amount of memory by default, even though
-           'dictionary' may be faster in most cases.
+          'dictionary' may be faster in most cases.
 
         .. versionadded:: 1.8.0
 
@@ -763,7 +762,6 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
-
         - If 'mergesort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
@@ -779,7 +777,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'mergesort'. This is done to not use
           a large amount of memory by default, even though
-           'dictionary' may be faster in most cases.
+          'dictionary' may be faster in most cases.
 
 
     Returns
-- 
cgit v1.2.1


From 0c339d4238d55cc3dd45236f5e7ccff503e007bf Mon Sep 17 00:00:00 2001
From: Miles Cranmer <miles.cranmer@gmail.com>
Date: Sat, 18 Jun 2022 06:20:16 +0000
Subject: DOC: Add missing indent in docstring

---
 numpy/lib/arraysetops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 8af8c8803..74cbd0965 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -545,7 +545,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
-   kind : {None, 'mergesort', 'dictionary'}, optional
+    kind : {None, 'mergesort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
-- 
cgit v1.2.1


From d9d4fd5b0ba38e52e6261ef51e5cd47cdddfdf1e Mon Sep 17 00:00:00 2001
From: Miles Cranmer <miles.cranmer@gmail.com>
Date: Sat, 18 Jun 2022 06:46:12 +0000
Subject: DOC: Fix list format for sphinx

---
 numpy/lib/arraysetops.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 74cbd0965..5102a5d06 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -549,17 +549,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
-        - If 'mergesort', will use a mergesort-based approach. This will have
+
+        * If 'mergesort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
-        - If 'dictionary', will use a key-dictionary approach similar
+        * If 'dictionary', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
           size of `ar1` plus the max-min value of `ar2`. This tends
           to be the faster method if the following formula is true:
-          `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
+          ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
           but may use greater memory.
-        - If `None`, will automatically choose 'dictionary' if
+        * If `None`, will automatically choose 'dictionary' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'mergesort'. This is done to not use
@@ -758,21 +759,22 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
-   kind : {None, 'mergesort', 'dictionary'}, optional
+    kind : {None, 'mergesort', 'dictionary'}, optional
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
-        - If 'mergesort', will use a mergesort-based approach. This will have
+
+        * If 'mergesort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
-        - If 'dictionary', will use a key-dictionary approach similar
+        * If 'dictionary', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
           size of `ar1` plus the max-min value of `ar2`. This tends
           to be the faster method if the following formula is true:
-          `log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927`,
+          ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
           but may use greater memory.
-        - If `None`, will automatically choose 'dictionary' if
+        * If `None`, will automatically choose 'dictionary' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'mergesort'. This is done to not use
-- 
cgit v1.2.1


From 4ed458f16d9dd64554ccf49e315c5b8fb577d4cd Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sat, 18 Jun 2022 13:49:03 -0400
Subject: MAINT: change kind names for in1d

- Switch dictionary->table, mergesort->sort
---
 numpy/lib/arraysetops.py | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 5102a5d06..56080382c 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -545,27 +545,27 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
-    kind : {None, 'mergesort', 'dictionary'}, optional
+    kind : {None, 'sort', 'table'}, optional
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
 
-        * If 'mergesort', will use a mergesort-based approach. This will have
+        * If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
-        * If 'dictionary', will use a key-dictionary approach similar
+        * If 'table', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
           size of `ar1` plus the max-min value of `ar2`. This tends
           to be the faster method if the following formula is true:
           ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
           but may use greater memory.
-        * If `None`, will automatically choose 'dictionary' if
+        * If `None`, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
-          otherwise will use 'mergesort'. This is done to not use
+          otherwise will use 'sort'. This is done to not use
           a large amount of memory by default, even though
-          'dictionary' may be faster in most cases.
+          'table' may be faster in most cases.
 
         .. versionadded:: 1.8.0
 
@@ -625,18 +625,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
                       np.issubdtype(ar2.dtype, np.integer))
 
-    if kind not in {None, 'mergesort', 'dictionary'}:
+    if kind not in {None, 'sort', 'table'}:
         raise ValueError(
             "Invalid kind: {0}. ".format(kind)
-            + "Please use None, 'mergesort' or 'dictionary'.")
+            + "Please use None, 'sort' or 'table'.")
 
-    if integer_arrays and kind in {None, 'dictionary'}:
+    if integer_arrays and kind in {None, 'table'}:
         ar2_min = np.min(ar2)
         ar2_max = np.max(ar2)
 
         ar2_range = int(ar2_max) - int(ar2_min)
 
-        # Constraints on whether we can actually use the dictionary method:
+        # Constraints on whether we can actually use the table method:
         range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max
         below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
 
@@ -650,7 +650,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
 
         if (
             range_safe_from_overflow and 
-            (below_memory_constraint or kind == 'dictionary')
+            (below_memory_constraint or kind == 'table')
         ):
 
             if invert:
@@ -672,18 +672,18 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
                                                         ar2_min]
 
             return outgoing_array
-        elif kind == 'dictionary':  # not range_safe_from_overflow
+        elif kind == 'table':  # not range_safe_from_overflow
             raise RuntimeError(
-                "You have specified kind='dictionary', "
+                "You have specified kind='table', "
                 "but the range of values in `ar2` exceeds the "
                 "maximum integer of the datatype. "
-                "Please set `kind` to None or 'mergesort'."
+                "Please set `kind` to None or 'sort'."
             )
-    elif kind == 'dictionary':
+    elif kind == 'table':
         raise ValueError(
-            "The 'dictionary' method is only "
+            "The 'table' method is only "
             "supported for boolean or integer arrays. "
-            "Please select 'mergesort' or None for kind."
+            "Please select 'sort' or None for kind."
         )
 
 
@@ -759,27 +759,27 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
-    kind : {None, 'mergesort', 'dictionary'}, optional
+    kind : {None, 'sort', 'table'}, optional
         The algorithm to use. This will not affect the final result,
         but will affect the speed. Default will select automatically
         based on memory considerations.
 
-        * If 'mergesort', will use a mergesort-based approach. This will have
+        * If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
-        * If 'dictionary', will use a key-dictionary approach similar
+        * If 'table', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
           size of `ar1` plus the max-min value of `ar2`. This tends
           to be the faster method if the following formula is true:
           ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
           but may use greater memory.
-        * If `None`, will automatically choose 'dictionary' if
+        * If `None`, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
-          otherwise will use 'mergesort'. This is done to not use
+          otherwise will use 'sort'. This is done to not use
           a large amount of memory by default, even though
-          'dictionary' may be faster in most cases.
+          'table' may be faster in most cases.
 
 
     Returns
-- 
cgit v1.2.1


From 6a3c80fb11d60569328e751bab7b664ed6a7967e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Mon, 20 Jun 2022 14:00:30 -0400
Subject: DOC: Improve clarity of in1d docstring

---
 numpy/lib/arraysetops.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 56080382c..a6a0d99cb 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -547,8 +547,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         to (but is faster than) ``np.invert(in1d(a, b))``.
     kind : {None, 'sort', 'table'}, optional
         The algorithm to use. This will not affect the final result,
-        but will affect the speed. Default will select automatically
-        based on memory considerations.
+        but will affect the speed and memory use. The default, None,
+        will select automatically based on memory considerations.
 
         * If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
@@ -560,7 +560,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
           to be the faster method if the following formula is true:
           ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
           but may use greater memory.
-        * If `None`, will automatically choose 'table' if
+        * If None, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'sort'. This is done to not use
@@ -761,8 +761,8 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         than) ``np.invert(np.isin(a, b))``.
     kind : {None, 'sort', 'table'}, optional
         The algorithm to use. This will not affect the final result,
-        but will affect the speed. Default will select automatically
-        based on memory considerations.
+        but will affect the speed and memory use. The default, None,
+        will select automatically based on memory considerations.
 
         * If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
@@ -774,7 +774,7 @@ def isin(element, test_elements, assume_unique=False, invert=False,
           to be the faster method if the following formula is true:
           ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
           but may use greater memory.
-        * If `None`, will automatically choose 'table' if
+        * If None, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'sort'. This is done to not use
-- 
cgit v1.2.1


From 75dbbea7d7111c151498722c0c2811cee86d34ff Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Mon, 20 Jun 2022 14:13:09 -0400
Subject: DOC: `assume_unique` does not affect table method

---
 numpy/lib/arraysetops.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a6a0d99cb..a81e0af8b 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -559,13 +559,15 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
           size of `ar1` plus the max-min value of `ar2`. This tends
           to be the faster method if the following formula is true:
           ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
-          but may use greater memory.
+          but may use greater memory. `assume_unique` has no effect
+          when the 'table' option is used.
         * If None, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'sort'. This is done to not use
           a large amount of memory by default, even though
-          'table' may be faster in most cases.
+          'table' may be faster in most cases. If 'table' is chosen,
+          `assume_unique` will have no effect.
 
         .. versionadded:: 1.8.0
 
@@ -773,13 +775,15 @@ def isin(element, test_elements, assume_unique=False, invert=False,
           size of `ar1` plus the max-min value of `ar2`. This tends
           to be the faster method if the following formula is true:
           ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
-          but may use greater memory.
+          but may use greater memory. `assume_unique` has no effect
+          when the 'table' option is used.
         * If None, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
           otherwise will use 'sort'. This is done to not use
           a large amount of memory by default, even though
-          'table' may be faster in most cases.
+          'table' may be faster in most cases. If 'table' is chosen,
+          `assume_unique` will have no effect.
 
 
     Returns
-- 
cgit v1.2.1


From 48580947a5c6c30ef81aa49da2a9399d6b1c3b45 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Mon, 20 Jun 2022 14:21:07 -0400
Subject: DOC: Notes on `kind` to in1d/isin docstring

---
 numpy/lib/arraysetops.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index a81e0af8b..8b95fcac9 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -556,11 +556,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
         * If 'table', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
-          size of `ar1` plus the max-min value of `ar2`. This tends
-          to be the faster method if the following formula is true:
-          ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
-          but may use greater memory. `assume_unique` has no effect
-          when the 'table' option is used.
+          size of `ar1` plus the max-min value of `ar2`. `assume_unique`
+          has no effect when the 'table' option is used.
         * If None, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
@@ -593,6 +590,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
     ``asarray(ar2)`` is an object array rather than the expected array of
     contained values.
 
+    Using ``kind='table'`` tends to be faster than `kind='sort'` if the
+    following relationship is true:
+    ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
+    but may use greater memory. The default value for `kind` will
+    be automatically selected based only on memory usage, so one may
+    manually set ``kind='table'`` if memory constraints can be relaxed.
+
     .. versionadded:: 1.4.0
 
     Examples
@@ -772,11 +776,8 @@ def isin(element, test_elements, assume_unique=False, invert=False,
         * If 'table', will use a key-dictionary approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
-          size of `ar1` plus the max-min value of `ar2`. This tends
-          to be the faster method if the following formula is true:
-          ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
-          but may use greater memory. `assume_unique` has no effect
-          when the 'table' option is used.
+          size of `ar1` plus the max-min value of `ar2`. `assume_unique`
+          has no effect when the 'table' option is used.
         * If None, will automatically choose 'table' if
           the required memory allocation is less than or equal to
           6 times the sum of the sizes of `ar1` and `ar2`,
@@ -812,6 +813,13 @@ def isin(element, test_elements, assume_unique=False, invert=False,
     of the `array` constructor's way of handling non-sequence collections.
     Converting the set to a list usually gives the desired behavior.
 
+    Using ``kind='table'`` tends to be faster than `kind='sort'` if the
+    following relationship is true:
+    ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
+    but may use greater memory. The default value for `kind` will
+    be automatically selected based only on memory usage, so one may
+    manually set ``kind='table'`` if memory constraints can be relaxed.
+
     .. versionadded:: 1.13.0
 
     Examples
-- 
cgit v1.2.1


From bb71875d9982da00503181f7be7b2672d4a7152b Mon Sep 17 00:00:00 2001
From: Miles Cranmer <miles.cranmer@gmail.com>
Date: Wed, 22 Jun 2022 16:53:10 -0400
Subject: MAINT: Minor suggestions from code review

Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
---
 numpy/lib/arraysetops.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 8b95fcac9..34035b2da 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -516,13 +516,13 @@ def setxor1d(ar1, ar2, assume_unique=False):
     return aux[flag[1:] & flag[:-1]]
 
 
-def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None,
+def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,
                      kind=None):
     return (ar1, ar2)
 
 
 @array_function_dispatch(_in1d_dispatcher)
-def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
+def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
     """
     Test whether each element of a 1-D array is also present in a second array.
 
@@ -633,8 +633,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, kind=None):
 
     if kind not in {None, 'sort', 'table'}:
         raise ValueError(
-            "Invalid kind: {0}. ".format(kind)
-            + "Please use None, 'sort' or 'table'.")
+            f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")
 
     if integer_arrays and kind in {None, 'table'}:
         ar2_min = np.min(ar2)
-- 
cgit v1.2.1


From 64de8b2eaba62f138be94744ada770aa227aae94 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 22 Jun 2022 16:55:20 -0400
Subject: MAINT: Remove positionality of kind in isin

---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 34035b2da..5d187cf07 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -736,12 +736,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
 
 
 def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
-                     kind=None):
+                     *, kind=None):
     return (element, test_elements)
 
 
 @array_function_dispatch(_isin_dispatcher)
-def isin(element, test_elements, assume_unique=False, invert=False,
+def isin(element, test_elements, assume_unique=False, invert=False, *,
          kind=None):
     """
     Calculates ``element in test_elements``, broadcasting over `element` only.
-- 
cgit v1.2.1


From c4aa5d8356c0f76550d17b27f034c2c19ffc1767 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 22 Jun 2022 16:57:52 -0400
Subject: DOC: Rephrase docstring of in1d/isin

---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 5d187cf07..e10700785 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -553,7 +553,7 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
         * If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
-        * If 'table', will use a key-dictionary approach similar
+        * If 'table', will use a lookup table approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
           size of `ar1` plus the max-min value of `ar2`. `assume_unique`
@@ -772,7 +772,7 @@ def isin(element, test_elements, assume_unique=False, invert=False, *,
         * If 'sort', will use a mergesort-based approach. This will have
           a memory usage of roughly 6 times the sum of the sizes of
           `ar1` and `ar2`, not accounting for size of dtypes.
-        * If 'table', will use a key-dictionary approach similar
+        * If 'table', will use a lookup table approach similar
           to a counting sort. This is only available for boolean and
           integer arrays. This will have a memory usage of the
           size of `ar1` plus the max-min value of `ar2`. `assume_unique`
-- 
cgit v1.2.1


From 6244c0631e1a78926db27143bf936bfb5e95c852 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 22 Jun 2022 17:00:38 -0400
Subject: MAINT: Fix edgecase for bool containers

---
 numpy/lib/arraysetops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index e10700785..9a44d7e44 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -623,9 +623,9 @@ def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
         ar2 = ar2.reshape(-1, 1)
     # Convert booleans to uint8 so we can use the fast integer algorithm
     if ar1.dtype == bool:
-        ar1 = ar1.view(np.uint8)
+        ar1 = ar1 + np.uint8(0)
     if ar2.dtype == bool:
-        ar2 = ar2.view(np.uint8)
+        ar2 = ar2 + np.uint8(0)
 
     # Check if we can use a fast integer algorithm:
     integer_arrays = (np.issubdtype(ar1.dtype, np.integer) and
-- 
cgit v1.2.1