BUG: Fix unique handling of nan entries. (#18070)

* benchmark bench_lib.Unique added * extended test_unique_1d * modify _unique1d * extend test with return_index, return_inverse and return_counts parameters * documentation updated * Update numpy/lib/arraysetops.py Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> * full coverage of nan types Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> * added tests for the datetime like dtypes * nan as vector of length 1 * use aux[-1] as nan, ..versionchanged, release note * for complex arrays all NaN values are considered equivalent Co-authored-by: filip_trojan <Tarantula2018> Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com>
author: Filip Trojan <f.trojan@centrum.cz> 2021-02-12 17:47:55 +0100
committer: GitHub <noreply@github.com> 2021-02-12 09:47:55 -0700
commit: 7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f (patch)
tree: d2dcbe5a6834d5f900e66ef50c660b412571d90c /numpy/lib/arraysetops.py
parent: a5dc2b5b917fc50575e10bbe139a0c78e43a1c1c (diff)
download: numpy-7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f.tar.gz
1 files changed, 20 insertions, 1 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index eb5c488e4..7600e17be 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -209,6 +209,16 @@ def unique(ar, return_index=False, return_inverse=False,
     flattened subarrays are sorted in lexicographic order starting with the
     first element.
 
+    .. versionchanged: NumPy 1.21
+        If nan values are in the input array, a single nan is put
+        to the end of the sorted unique values.
+
+        Also for complex arrays all NaN values are considered equivalent
+        (no matter whether the NaN is in the real or imaginary part).
+        As the representant for the returned array the smallest one in the
+        lexicographical order is chosen - see np.sort for how the lexicographical
+        order is defined for complex arrays.
+
     Examples
     --------
     >>> np.unique([1, 1, 2, 2, 3, 3])
@@ -324,7 +334,16 @@ def _unique1d(ar, return_index=False, return_inverse=False,
         aux = ar
     mask = np.empty(aux.shape, dtype=np.bool_)
     mask[:1] = True
-    mask[1:] = aux[1:] != aux[:-1]
+    if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]):
+        if aux.dtype.kind == "c":  # for complex all NaNs are considered equivalent
+            aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
+        else:
+            aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
+        mask[1:aux_firstnan] = (aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
+        mask[aux_firstnan] = True
+        mask[aux_firstnan + 1:] = False
+    else:
+        mask[1:] = aux[1:] != aux[:-1]
 
     ret = (aux[mask],)
     if return_index:
author	Filip Trojan <f.trojan@centrum.cz>	2021-02-12 17:47:55 +0100
committer	GitHub <noreply@github.com>	2021-02-12 09:47:55 -0700
commit	7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f (patch)
tree	d2dcbe5a6834d5f900e66ef50c660b412571d90c /numpy/lib/arraysetops.py
parent	a5dc2b5b917fc50575e10bbe139a0c78e43a1c1c (diff)
download	numpy-7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f.tar.gz