From 5936386ff9b5674385ccee9154e320c686e4a28e Mon Sep 17 00:00:00 2001
From: AngelGris <lucianogarciabes@gmail.com>
Date: Mon, 8 Feb 2021 18:38:06 +0100
Subject: BUG: np.in1d bug on the object array (issue 17923)

---
 numpy/lib/arraysetops.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 6c6c1ff80..e7f9add20 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -575,12 +575,28 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:
         if invert:
             mask = np.ones(len(ar1), dtype=bool)
-            for a in ar2:
-                mask &= (ar1 != a)
+            # If ar2.dtype is object, store is used to wrap the a value
+            # in an array to prevent tuples from being unpacked before the comparison
+            if ar2.dtype == object:
+                store = np.empty(shape=1, dtype=object)
+                for a in ar2:
+                    store[0] = a
+                    mask &= (ar1 != store)
+            else:
+                for a in ar2:
+                    mask &= (ar1 != a)
         else:
             mask = np.zeros(len(ar1), dtype=bool)
-            for a in ar2:
-                mask |= (ar1 == a)
+            # If ar2.dtype is object, store is used to wrap the a value
+            # in an array to prevent tuples from being unpacked before the comparison
+            if ar2.dtype == object:
+                store = np.empty(shape=1, dtype=object)
+                for a in ar2:
+                    store[0] = a
+                    mask |= (ar1 == store)
+            else:
+                for a in ar2:
+                    mask |= (ar1 == a)
         return mask
 
     # Otherwise use sorting
-- 
cgit v1.2.1


From 8fbd472e562237dd56ce251e266e2090d6c5003b Mon Sep 17 00:00:00 2001
From: AngelGris <lucianogarciabes@gmail.com>
Date: Mon, 8 Feb 2021 21:55:01 +0100
Subject: Implement different approach to fix bug

---
 numpy/lib/arraysetops.py | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index e7f9add20..eb5c488e4 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -565,6 +565,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     ar1 = np.asarray(ar1).ravel()
     ar2 = np.asarray(ar2).ravel()
 
+    # Ensure that iteration through object arrays yields size-1 arrays
+    if ar2.dtype == object:
+        ar2 = ar2.reshape(-1, 1)
+
     # Check if one of the arrays may contain arbitrary objects
     contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
 
@@ -575,28 +579,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:
         if invert:
             mask = np.ones(len(ar1), dtype=bool)
-            # If ar2.dtype is object, store is used to wrap the a value
-            # in an array to prevent tuples from being unpacked before the comparison
-            if ar2.dtype == object:
-                store = np.empty(shape=1, dtype=object)
-                for a in ar2:
-                    store[0] = a
-                    mask &= (ar1 != store)
-            else:
-                for a in ar2:
-                    mask &= (ar1 != a)
+            for a in ar2:
+                mask &= (ar1 != a)
         else:
             mask = np.zeros(len(ar1), dtype=bool)
-            # If ar2.dtype is object, store is used to wrap the a value
-            # in an array to prevent tuples from being unpacked before the comparison
-            if ar2.dtype == object:
-                store = np.empty(shape=1, dtype=object)
-                for a in ar2:
-                    store[0] = a
-                    mask |= (ar1 == store)
-            else:
-                for a in ar2:
-                    mask |= (ar1 == a)
+            for a in ar2:
+                mask |= (ar1 == a)
         return mask
 
     # Otherwise use sorting
-- 
cgit v1.2.1


From 7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f Mon Sep 17 00:00:00 2001
From: Filip Trojan <f.trojan@centrum.cz>
Date: Fri, 12 Feb 2021 17:47:55 +0100
Subject: BUG: Fix unique handling of nan entries. (#18070)

* benchmark bench_lib.Unique added

* extended test_unique_1d

* modify _unique1d

* extend test with return_index, return_inverse and return_counts parameters

* documentation updated

* Update numpy/lib/arraysetops.py

Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com>

* full coverage of nan types

Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com>

* added tests for the datetime like dtypes

* nan as vector of length 1

* use aux[-1] as nan, ..versionchanged, release note

* for complex arrays all NaN values are considered equivalent

Co-authored-by: filip_trojan <Tarantula2018>
Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com>
---
 numpy/lib/arraysetops.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'numpy/lib/arraysetops.py')

diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index eb5c488e4..7600e17be 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -209,6 +209,16 @@ def unique(ar, return_index=False, return_inverse=False,
     flattened subarrays are sorted in lexicographic order starting with the
     first element.
 
+    .. versionchanged: NumPy 1.21
+        If nan values are in the input array, a single nan is put
+        to the end of the sorted unique values.
+
+        Also for complex arrays all NaN values are considered equivalent
+        (no matter whether the NaN is in the real or imaginary part).
+        As the representant for the returned array the smallest one in the
+        lexicographical order is chosen - see np.sort for how the lexicographical
+        order is defined for complex arrays.
+
     Examples
     --------
     >>> np.unique([1, 1, 2, 2, 3, 3])
@@ -324,7 +334,16 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         aux = ar
     mask = np.empty(aux.shape, dtype=np.bool_)
     mask[:1] = True
-    mask[1:] = aux[1:] != aux[:-1]
+    if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]):
+        if aux.dtype.kind == "c":  # for complex all NaNs are considered equivalent
+            aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
+        else:
+            aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
+        mask[1:aux_firstnan] = (aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
+        mask[aux_firstnan] = True
+        mask[aux_firstnan + 1:] = False
+    else:
+        mask[1:] = aux[1:] != aux[:-1]
 
     ret = (aux[mask],)
     if return_index:
-- 
cgit v1.2.1