From 5936386ff9b5674385ccee9154e320c686e4a28e Mon Sep 17 00:00:00 2001 From: AngelGris Date: Mon, 8 Feb 2021 18:38:06 +0100 Subject: BUG: np.in1d bug on the object array (issue 17923) --- numpy/lib/arraysetops.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 6c6c1ff80..e7f9add20 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -575,12 +575,28 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: if invert: mask = np.ones(len(ar1), dtype=bool) - for a in ar2: - mask &= (ar1 != a) + # If ar2.dtype is object, store is used to wrap the a value + # in an array to prevent tuples from being unpacked before the comparison + if ar2.dtype == object: + store = np.empty(shape=1, dtype=object) + for a in ar2: + store[0] = a + mask &= (ar1 != store) + else: + for a in ar2: + mask &= (ar1 != a) else: mask = np.zeros(len(ar1), dtype=bool) - for a in ar2: - mask |= (ar1 == a) + # If ar2.dtype is object, store is used to wrap the a value + # in an array to prevent tuples from being unpacked before the comparison + if ar2.dtype == object: + store = np.empty(shape=1, dtype=object) + for a in ar2: + store[0] = a + mask |= (ar1 == store) + else: + for a in ar2: + mask |= (ar1 == a) return mask # Otherwise use sorting -- cgit v1.2.1 From 8fbd472e562237dd56ce251e266e2090d6c5003b Mon Sep 17 00:00:00 2001 From: AngelGris Date: Mon, 8 Feb 2021 21:55:01 +0100 Subject: Implement different approach to fix bug --- numpy/lib/arraysetops.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e7f9add20..eb5c488e4 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -565,6 +565,10 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): ar1 = np.asarray(ar1).ravel() ar2 = np.asarray(ar2).ravel() + # Ensure that iteration through object arrays yields size-1 arrays + if ar2.dtype == object: + ar2 = ar2.reshape(-1, 1) + # Check if one of the arrays may contain arbitrary objects contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject @@ -575,28 +579,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: if invert: mask = np.ones(len(ar1), dtype=bool) - # If ar2.dtype is object, store is used to wrap the a value - # in an array to prevent tuples from being unpacked before the comparison - if ar2.dtype == object: - store = np.empty(shape=1, dtype=object) - for a in ar2: - store[0] = a - mask &= (ar1 != store) - else: - for a in ar2: - mask &= (ar1 != a) + for a in ar2: + mask &= (ar1 != a) else: mask = np.zeros(len(ar1), dtype=bool) - # If ar2.dtype is object, store is used to wrap the a value - # in an array to prevent tuples from being unpacked before the comparison - if ar2.dtype == object: - store = np.empty(shape=1, dtype=object) - for a in ar2: - store[0] = a - mask |= (ar1 == store) - else: - for a in ar2: - mask |= (ar1 == a) + for a in ar2: + mask |= (ar1 == a) return mask # Otherwise use sorting -- cgit v1.2.1 From 7dcd29aaafe1ab8be4be04d3c793e5bcaf17459f Mon Sep 17 00:00:00 2001 From: Filip Trojan Date: Fri, 12 Feb 2021 17:47:55 +0100 Subject: BUG: Fix unique handling of nan entries. (#18070) * benchmark bench_lib.Unique added * extended test_unique_1d * modify _unique1d * extend test with return_index, return_inverse and return_counts parameters * documentation updated * Update numpy/lib/arraysetops.py Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> * full coverage of nan types Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> * added tests for the datetime like dtypes * nan as vector of length 1 * use aux[-1] as nan, ..versionchanged, release note * for complex arrays all NaN values are considered equivalent Co-authored-by: filip_trojan Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> --- numpy/lib/arraysetops.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index eb5c488e4..7600e17be 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -209,6 +209,16 @@ def unique(ar, return_index=False, return_inverse=False, flattened subarrays are sorted in lexicographic order starting with the first element. + .. versionchanged: NumPy 1.21 + If nan values are in the input array, a single nan is put + to the end of the sorted unique values. + + Also for complex arrays all NaN values are considered equivalent + (no matter whether the NaN is in the real or imaginary part). + As the representant for the returned array the smallest one in the + lexicographical order is chosen - see np.sort for how the lexicographical + order is defined for complex arrays. + Examples -------- >>> np.unique([1, 1, 2, 2, 3, 3]) @@ -324,7 +334,16 @@ def _unique1d(ar, return_index=False, return_inverse=False, aux = ar mask = np.empty(aux.shape, dtype=np.bool_) mask[:1] = True - mask[1:] = aux[1:] != aux[:-1] + if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]): + if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent + aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') + else: + aux_firstnan = np.searchsorted(aux, aux[-1], side='left') + mask[1:aux_firstnan] = (aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) + mask[aux_firstnan] = True + mask[aux_firstnan + 1:] = False + else: + mask[1:] = aux[1:] != aux[:-1] ret = (aux[mask],) if return_index: -- cgit v1.2.1