From 69b0c42bca27dd5d5522de306bcd7db7deccbfad Mon Sep 17 00:00:00 2001 From: B R S Recht Date: Thu, 4 May 2017 20:03:09 -0400 Subject: ENH: Add isin, genereralizing in1d to ND arrays (#8423) This fixes gh-8331 Also update the docs for arraysetops to remove the outdated "1D" from the description, which was already incorrect for np.unique. --- numpy/lib/arraysetops.py | 102 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 3 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index fae3e3cbc..9a1448991 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -1,9 +1,10 @@ """ -Set operations for 1D numeric arrays based on sorting. +Set operations for arrays based on sorting. :Contains: - ediff1d, unique, + isin, + ediff1d, intersect1d, setxor1d, in1d, @@ -31,7 +32,7 @@ import numpy as np __all__ = [ 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique', - 'in1d' + 'in1d', 'isin' ] @@ -380,6 +381,7 @@ def setxor1d(ar1, ar2, assume_unique=False): flag2 = flag[1:] == flag[:-1] return aux[flag2] + def in1d(ar1, ar2, assume_unique=False, invert=False): """ Test whether each element of a 1-D array is also present in a second array. @@ -387,6 +389,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): Returns a boolean array the same length as `ar1` that is True where an element of `ar1` is in `ar2` and False otherwise. + We recommend using :func:`isin` instead of `in1d` for new code. + Parameters ---------- ar1 : (M,) array_like @@ -411,6 +415,8 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): See Also -------- + isin : Version of this function that preserves the + shape of ar1. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. @@ -481,6 +487,96 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): else: return ret[rev_idx] + +def isin(element, test_elements, assume_unique=False, invert=False): + """ + Calculates `element in test_elements`, broadcasting over `element` only. + Returns a boolean array of the same shape as `element` that is True + where an element of `element` is in `test_elements` and False otherwise. + + Parameters + ---------- + element : array_like + Input array. + test_elements : array_like + The values against which to test each value of `element`. + This argument is flattened if it is an array or array_like. + See notes for behavior with non-array-like parameters. + assume_unique : bool, optional + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. + invert : bool, optional + If True, the values in the returned array are inverted, as if + calculating `element not in test_elements`. Default is False. + ``np.isin(a, b, invert=True)`` is equivalent to (but faster + than) ``np.invert(np.isin(a, b))``. + + Returns + ------- + isin : ndarray, bool + Has the same shape as `element`. The values `element[isin]` + are in `test_elements`. + + See Also + -------- + in1d : Flattened version of this function. + numpy.lib.arraysetops : Module with a number of other functions for + performing set operations on arrays. + Notes + ----- + + `isin` is an element-wise function version of the python keyword `in`. + ``isin(a, b)`` is roughly equivalent to + ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. + + `element` and `test_elements` are converted to arrays if they are not + already. If `test_elements` is a set (or other non-sequence collection) + it will be converted to an object array with one element, rather than an + array of the values contained in `test_elements`. This is a consequence + of the `array` constructor's way of handling non-sequence collections. + Converting the set to a list usually gives the desired behavior. + + .. versionadded:: 1.13.0 + + Examples + -------- + >>> element = 2*np.arange(4).reshape((2, 2)) + >>> element + array([[0, 2], + [4, 6]]) + >>> test_elements = [1, 2, 4, 8] + >>> mask = np.isin(element, test_elements) + >>> mask + array([[ False, True], + [ True, False]], dtype=bool) + >>> element[mask] + array([2, 4]) + >>> mask = np.isin(element, test_elements, invert=True) + >>> mask + array([[ True, False], + [ False, True]], dtype=bool) + >>> element[mask] + array([0, 6]) + + Because of how `array` handles sets, the following does not + work as expected: + + >>> test_set = {1, 2, 4, 8} + >>> np.isin(element, test_set) + array([[ False, False], + [ False, False]], dtype=bool) + + Casting the set to a list gives the expected result: + + >>> np.isin(element, list(test_set)) + array([[ False, True], + [ True, False]], dtype=bool) + """ + element = np.asarray(element) + return in1d(element, test_elements, assume_unique=assume_unique, + invert=invert).reshape(element.shape) + + def union1d(ar1, ar2): """ Find the union of two arrays. -- cgit v1.2.1 From 8fbf75e499196c05da46302f035909897c9ec272 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 6 May 2017 15:31:52 -0700 Subject: DOC: fix docstring for np.isin --- numpy/lib/arraysetops.py | 1 + 1 file changed, 1 insertion(+) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 9a1448991..d29e555b8 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -522,6 +522,7 @@ def isin(element, test_elements, assume_unique=False, invert=False): in1d : Flattened version of this function. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. + Notes ----- -- cgit v1.2.1 From e52efca5accf06016ca4c1b180216e66db881ce2 Mon Sep 17 00:00:00 2001 From: Konrad Kapp Date: Sat, 20 May 2017 12:50:15 +0200 Subject: BUG: set default type for empty index array to `numpy.intp` (#9142) Fixes #9137 --- numpy/lib/arraysetops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index d29e555b8..9b0a1193f 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -263,9 +263,9 @@ def _unique1d(ar, return_index=False, return_inverse=False, else: ret = (ar,) if return_index: - ret += (np.empty(0, np.bool),) + ret += (np.empty(0, np.intp),) if return_inverse: - ret += (np.empty(0, np.bool),) + ret += (np.empty(0, np.intp),) if return_counts: ret += (np.empty(0, np.intp),) return ret -- cgit v1.2.1 From cba15d5ec3472ac167bdfe06f32dc07d6bbabd4d Mon Sep 17 00:00:00 2001 From: wufangjie Date: Thu, 27 Jul 2017 11:21:05 +0800 Subject: make `setxor1d' a bit clear and speed up We need to find the index which is not the same with the left and right, I think np.logical_and's meaning is more clear and I test this got a speed up --- numpy/lib/arraysetops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 9b0a1193f..a454b8725 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -378,8 +378,9 @@ def setxor1d(ar1, ar2, assume_unique=False): # flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) # flag2 = ediff1d( flag ) == 0 - flag2 = flag[1:] == flag[:-1] - return aux[flag2] +# flag2 = flag[1:] == flag[:-1] +# return aux[flag2] + return aux[np.logical_and(flag[1:], flag[:-1])] def in1d(ar1, ar2, assume_unique=False, invert=False): -- cgit v1.2.1 From 5c213e98bf7721de20e6bc938eaa40349cba883e Mon Sep 17 00:00:00 2001 From: wufangjie Date: Fri, 28 Jul 2017 08:06:59 +0800 Subject: MAINT: Make `setxor1d' a bit clearer and speed it up --- numpy/lib/arraysetops.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index a454b8725..aa3a05e12 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -375,12 +375,8 @@ def setxor1d(ar1, ar2, assume_unique=False): return aux aux.sort() -# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) -# flag2 = ediff1d( flag ) == 0 -# flag2 = flag[1:] == flag[:-1] -# return aux[flag2] - return aux[np.logical_and(flag[1:], flag[:-1])] + return aux[flag[1:] & flag[:-1]] def in1d(ar1, ar2, assume_unique=False, invert=False): -- cgit v1.2.1 From 2b781f8967488dc007f8f0a1e6a7f49208788d12 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 1 Aug 2017 20:29:36 +0000 Subject: MAINT/DOC: Use builtin when np.{x} is builtins.{x}. This is the case for x in {int, bool, str, float, complex, object}. Using the np.{x} version is deceptive as it suggests that there is a difference. This change doesn't affect any external behaviour. The `long` type is missing in python 3, so np.long is still useful --- numpy/lib/arraysetops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index aa3a05e12..ededb9dd0 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -451,11 +451,11 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): # This code is significantly faster when the condition is satisfied. if len(ar2) < 10 * len(ar1) ** 0.145: if invert: - mask = np.ones(len(ar1), dtype=np.bool) + mask = np.ones(len(ar1), dtype=bool) for a in ar2: mask &= (ar1 != a) else: - mask = np.zeros(len(ar1), dtype=np.bool) + mask = np.zeros(len(ar1), dtype=bool) for a in ar2: mask |= (ar1 == a) return mask -- cgit v1.2.1 From ac6b1a902b99e340cf7eeeeb7392c91e38db9dd8 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 13 Nov 2017 23:45:45 -0800 Subject: ENH: don't show boolean dtype, as it is implied --- numpy/lib/arraysetops.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index ededb9dd0..59b54eb38 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -435,12 +435,12 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): >>> states = [0, 2] >>> mask = np.in1d(test, states) >>> mask - array([ True, False, True, False, True], dtype=bool) + array([ True, False, True, False, True]) >>> test[mask] array([0, 2, 0]) >>> mask = np.in1d(test, states, invert=True) >>> mask - array([False, True, False, True, False], dtype=bool) + array([False, True, False, True, False]) >>> test[mask] array([1, 5]) """ @@ -546,13 +546,13 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> mask = np.isin(element, test_elements) >>> mask array([[ False, True], - [ True, False]], dtype=bool) + [ True, False]]) >>> element[mask] array([2, 4]) >>> mask = np.isin(element, test_elements, invert=True) >>> mask array([[ True, False], - [ False, True]], dtype=bool) + [ False, True]]) >>> element[mask] array([0, 6]) @@ -562,13 +562,13 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> test_set = {1, 2, 4, 8} >>> np.isin(element, test_set) array([[ False, False], - [ False, False]], dtype=bool) + [ False, False]]) Casting the set to a list gives the expected result: >>> np.isin(element, list(test_set)) array([[ False, True], - [ True, False]], dtype=bool) + [ True, False]]) """ element = np.asarray(element) return in1d(element, test_elements, assume_unique=assume_unique, -- cgit v1.2.1 From 2d140f11857fc1353d6b2dcbb801e693128fd09a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20D=C3=B6pfert?= Date: Sat, 18 Nov 2017 19:21:23 +0100 Subject: ENH: Make `np.in1d()` work for unorderable object arrays (#9999) --- numpy/lib/arraysetops.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index ededb9dd0..f3301af92 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -448,8 +448,14 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): ar1 = np.asarray(ar1).ravel() ar2 = np.asarray(ar2).ravel() - # This code is significantly faster when the condition is satisfied. - if len(ar2) < 10 * len(ar1) ** 0.145: + # Check if one of the arrays may contain arbitrary objects + contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject + + # This code is run when + # a) the first condition is true, making the code significantly faster + # b) the second condition is true (i.e. `ar1` or `ar2` may contain + # arbitrary objects), since then sorting is not guaranteed to work + if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: if invert: mask = np.ones(len(ar1), dtype=bool) for a in ar2: -- cgit v1.2.1 From 45df7011edaeb263a231003c1efcb5ebb902a349 Mon Sep 17 00:00:00 2001 From: Anas Khan Date: Mon, 8 Jan 2018 11:57:10 +0530 Subject: BUG: fixing flattening of arrays in `union1d` in arraysetops.py --- numpy/lib/arraysetops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index a9426cdf3..e00163941 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -614,7 +614,7 @@ def union1d(ar1, ar2): >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([1, 2, 3, 4, 6]) """ - return unique(np.concatenate((ar1, ar2))) + return unique(np.concatenate((ar1.flatten(), ar2.flatten()))) def setdiff1d(ar1, ar2, assume_unique=False): """ -- cgit v1.2.1 From abb17d31e0f92f9e291a1a5b4f70742940287af8 Mon Sep 17 00:00:00 2001 From: Anas Khan Date: Tue, 9 Jan 2018 12:08:37 +0530 Subject: BUG: concatenation using axis=None in union1d --- numpy/lib/arraysetops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e00163941..6fce1c047 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -614,7 +614,7 @@ def union1d(ar1, ar2): >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([1, 2, 3, 4, 6]) """ - return unique(np.concatenate((ar1.flatten(), ar2.flatten()))) + return unique(np.concatenate((ar1, ar2), axis=None)) def setdiff1d(ar1, ar2, assume_unique=False): """ -- cgit v1.2.1 From dcd1f1b7c4cdb88baf11b483ecb37e923349b680 Mon Sep 17 00:00:00 2001 From: Samuel Jackson Date: Wed, 17 Jan 2018 09:46:58 +0000 Subject: DOC: Fix version added labels in numpy.unique --- numpy/lib/arraysetops.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index a9426cdf3..7e8bf2133 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -135,13 +135,16 @@ def unique(ar, return_index=False, return_inverse=False, return_counts : bool, optional If True, also return the number of times each unique item appears in `ar`. + .. versionadded:: 1.9.0 + axis : int or None, optional The axis to operate on. If None, `ar` will be flattened beforehand. Otherwise, duplicate items will be removed along the provided axis, with all the other axes belonging to the each of the unique elements. Object arrays or structured arrays that contain objects are not supported if the `axis` kwarg is used. + .. versionadded:: 1.13.0 @@ -159,6 +162,7 @@ def unique(ar, return_index=False, return_inverse=False, unique_counts : ndarray, optional The number of times each of the unique values comes up in the original array. Only provided if `return_counts` is True. + .. versionadded:: 1.9.0 See Also -- cgit v1.2.1 From 3c05221dd443323e7948eadf8105c0b52e760f70 Mon Sep 17 00:00:00 2001 From: Claudio Freire Date: Wed, 31 Jan 2018 00:50:57 -0800 Subject: MAINT: Remove messy handling of output tuple in np.unique Largely taken from gh-9531 --- numpy/lib/arraysetops.py | 64 +++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 31 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index b1e74dc74..78d4536c0 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -110,6 +110,14 @@ def ediff1d(ary, to_end=None, to_begin=None): return result +def _unpack_tuple(x): + """ Unpacks one-element tuples for use as return values """ + if len(x) == 1: + return x[0] + else: + return x + + def unique(ar, return_index=False, return_inverse=False, return_counts=False, axis=None): """ @@ -211,7 +219,9 @@ def unique(ar, return_index=False, return_inverse=False, """ ar = np.asanyarray(ar) if axis is None: - return _unique1d(ar, return_index, return_inverse, return_counts) + ret = _unique1d(ar, return_index, return_inverse, return_counts) + return _unpack_tuple(ret) + if not (-ar.ndim <= axis < ar.ndim): raise ValueError('Invalid axis kwarg specified for unique') @@ -245,11 +255,9 @@ def unique(ar, return_index=False, return_inverse=False, output = _unique1d(consolidated, return_index, return_inverse, return_counts) - if not (return_index or return_inverse or return_counts): - return reshape_uniq(output) - else: - uniq = reshape_uniq(output[0]) - return (uniq,) + output[1:] + output = (reshape_uniq(output[0]),) + output[1:] + return _unpack_tuple(output) + def _unique1d(ar, return_index=False, return_inverse=False, return_counts=False): @@ -259,19 +267,15 @@ def _unique1d(ar, return_index=False, return_inverse=False, ar = np.asanyarray(ar).flatten() optional_indices = return_index or return_inverse - optional_returns = optional_indices or return_counts if ar.size == 0: - if not optional_returns: - ret = ar - else: - ret = (ar,) - if return_index: - ret += (np.empty(0, np.intp),) - if return_inverse: - ret += (np.empty(0, np.intp),) - if return_counts: - ret += (np.empty(0, np.intp),) + ret = (ar,) + if return_index: + ret += (np.empty(0, np.intp),) + if return_inverse: + ret += (np.empty(0, np.intp),) + if return_counts: + ret += (np.empty(0, np.intp),) return ret if optional_indices: @@ -282,22 +286,20 @@ def _unique1d(ar, return_index=False, return_inverse=False, aux = ar flag = np.concatenate(([True], aux[1:] != aux[:-1])) - if not optional_returns: - ret = aux[flag] - else: - ret = (aux[flag],) - if return_index: - ret += (perm[flag],) - if return_inverse: - iflag = np.cumsum(flag) - 1 - inv_idx = np.empty(ar.shape, dtype=np.intp) - inv_idx[perm] = iflag - ret += (inv_idx,) - if return_counts: - idx = np.concatenate(np.nonzero(flag) + ([ar.size],)) - ret += (np.diff(idx),) + ret = (aux[flag],) + if return_index: + ret += (perm[flag],) + if return_inverse: + iflag = np.cumsum(flag) - 1 + inv_idx = np.empty(ar.shape, dtype=np.intp) + inv_idx[perm] = iflag + ret += (inv_idx,) + if return_counts: + idx = np.concatenate(np.nonzero(flag) + ([ar.size],)) + ret += (np.diff(idx),) return ret + def intersect1d(ar1, ar2, assume_unique=False): """ Find the intersection of two arrays. -- cgit v1.2.1 From 2b417df83202df9ea67f1eec76985a3da20cb86c Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 31 Jan 2018 00:56:36 -0800 Subject: MAINT: Remove special-casing of empty arrays in unique_1d --- numpy/lib/arraysetops.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 78d4536c0..e1c1c8803 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -268,34 +268,26 @@ def _unique1d(ar, return_index=False, return_inverse=False, optional_indices = return_index or return_inverse - if ar.size == 0: - ret = (ar,) - if return_index: - ret += (np.empty(0, np.intp),) - if return_inverse: - ret += (np.empty(0, np.intp),) - if return_counts: - ret += (np.empty(0, np.intp),) - return ret - if optional_indices: perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') aux = ar[perm] else: ar.sort() aux = ar - flag = np.concatenate(([True], aux[1:] != aux[:-1])) + mask = np.empty(aux.shape, dtype=np.bool_) + mask[:1] = True + mask[1:] = aux[1:] != aux[:-1] - ret = (aux[flag],) + ret = (aux[mask],) if return_index: - ret += (perm[flag],) + ret += (perm[mask],) if return_inverse: - iflag = np.cumsum(flag) - 1 + imask = np.cumsum(mask) - 1 inv_idx = np.empty(ar.shape, dtype=np.intp) - inv_idx[perm] = iflag + inv_idx[perm] = imask ret += (inv_idx,) if return_counts: - idx = np.concatenate(np.nonzero(flag) + ([ar.size],)) + idx = np.concatenate(np.nonzero(mask) + ([ar.size],)) ret += (np.diff(idx),) return ret -- cgit v1.2.1 From 41872c1085b707a6020b0058dc063d2981520bc6 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 31 Jan 2018 01:12:06 -0800 Subject: DOC: Use a bulleted list to show the outputs of `unique`, for clarity --- numpy/lib/arraysetops.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e1c1c8803..1ac1f3f03 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -124,10 +124,11 @@ def unique(ar, return_index=False, return_inverse=False, Find the unique elements of an array. Returns the sorted unique elements of an array. There are three optional - outputs in addition to the unique elements: the indices of the input array - that give the unique values, the indices of the unique array that - reconstruct the input array, and the number of times each unique value - comes up in the input array. + outputs in addition to the unique elements: + + * the indices of the input array that give the unique values + * the indices of the unique array that reconstruct the input array + * the number of times each unique value comes up in the input array Parameters ---------- -- cgit v1.2.1 From f80e20cfcf0728e068933cd5de9575db8ac3f803 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 31 Jan 2018 01:12:53 -0800 Subject: MAINT: Make it clear that counts and inverse depend only on the mask --- numpy/lib/arraysetops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 1ac1f3f03..e6ff5bf38 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -284,11 +284,11 @@ def _unique1d(ar, return_index=False, return_inverse=False, ret += (perm[mask],) if return_inverse: imask = np.cumsum(mask) - 1 - inv_idx = np.empty(ar.shape, dtype=np.intp) + inv_idx = np.empty(mask.shape, dtype=np.intp) inv_idx[perm] = imask ret += (inv_idx,) if return_counts: - idx = np.concatenate(np.nonzero(mask) + ([ar.size],)) + idx = np.concatenate(np.nonzero(mask) + ([mask.size],)) ret += (np.diff(idx),) return ret -- cgit v1.2.1 From ef70f13177a53266fd8547da6e00bc252a057893 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 31 Jan 2018 09:38:14 -0800 Subject: MAINT: Use AxisError in swapaxes --- numpy/lib/arraysetops.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e6ff5bf38..7b103ef3e 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -223,10 +223,12 @@ def unique(ar, return_index=False, return_inverse=False, ret = _unique1d(ar, return_index, return_inverse, return_counts) return _unpack_tuple(ret) - if not (-ar.ndim <= axis < ar.ndim): - raise ValueError('Invalid axis kwarg specified for unique') + try: + ar = np.swapaxes(ar, axis, 0) + except np.AxisError: + # this removes the "axis1" or "axis2" prefix from the error message + raise np.AxisError(axis, ar.ndim) - ar = np.swapaxes(ar, axis, 0) orig_shape, orig_dtype = ar.shape, ar.dtype # Must reshape to a contiguous 2D array for this to work... ar = ar.reshape(orig_shape[0], -1) -- cgit v1.2.1 From f28615092d6cd7f849dd09d0dc57471af7def143 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Tue, 13 Feb 2018 21:47:51 -0700 Subject: BUG: Revert sort optimization in np.unique. The optimization was to sort integer subarrays by treating them as strings of unsigned bytes. That worked fine for finding the unique subarrays, but the sort order of the results could be unexpected. Closes #10495. --- numpy/lib/arraysetops.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 7b103ef3e..e8eda297f 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -148,16 +148,15 @@ def unique(ar, return_index=False, return_inverse=False, .. versionadded:: 1.9.0 axis : int or None, optional - The axis to operate on. If None, `ar` will be flattened beforehand. - Otherwise, duplicate items will be removed along the provided axis, - with all the other axes belonging to the each of the unique elements. - Object arrays or structured arrays that contain objects are not - supported if the `axis` kwarg is used. + The axis to operate on. If None, `ar` will be flattened. If an integer, + the subarrays indexed by the given axis will be flattened and treated + as the elements of a 1-D array with the dimension of the given axis, + see the notes for more details. Object arrays or structured arrays + that contain objects are not supported if the `axis` kwarg is used. The + default is None. .. versionadded:: 1.13.0 - - Returns ------- unique : ndarray @@ -179,6 +178,17 @@ def unique(ar, return_index=False, return_inverse=False, numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. + Notes + ----- + When an axis is specified the subarrays indexed by the axis are sorted. + This is done by making the specified axis the first dimension of the array + and then flattening the subarrays in C order. The flattened subarrays are + then viewed as a structured type with each element given a label, with the + effect that we end up with a 1-D array of structured types that can be + treated in the same way as any other 1-D array. The result is that the + flattened subarrays are sorted in lexicographic order starting with the + first element. + Examples -------- >>> np.unique([1, 1, 2, 2, 3, 3]) @@ -223,25 +233,18 @@ def unique(ar, return_index=False, return_inverse=False, ret = _unique1d(ar, return_index, return_inverse, return_counts) return _unpack_tuple(ret) + # axis was specified and not None try: ar = np.swapaxes(ar, axis, 0) except np.AxisError: # this removes the "axis1" or "axis2" prefix from the error message raise np.AxisError(axis, ar.ndim) - orig_shape, orig_dtype = ar.shape, ar.dtype # Must reshape to a contiguous 2D array for this to work... + orig_shape, orig_dtype = ar.shape, ar.dtype ar = ar.reshape(orig_shape[0], -1) ar = np.ascontiguousarray(ar) - - if ar.dtype.char in (np.typecodes['AllInteger'] + - np.typecodes['Datetime'] + 'S'): - # Optimization: Creating a view of your data with a np.void data type of - # size the number of bytes in a full row. Handles any type where items - # have a unique binary representation, i.e. 0 is only 0, not +0 and -0. - dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1])) - else: - dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] + dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] try: consolidated = ar.view(dtype) -- cgit v1.2.1 From 0bd86db79b7a8000f9dbd401df722ffae9c2b33c Mon Sep 17 00:00:00 2001 From: Christopher Date: Fri, 25 May 2018 19:25:20 -0400 Subject: ENH: Modify intersect1d to return common indices (#10684) * added function commonpts1d * Update arraysetops.py * Update arraysetops.py * modified intersect1d to return common indices Proposed this idea in previous pull request (https://github.com/numpy/numpy/pull/10683) and made suggested changes for implementing this idea to have relevant common indices that correspond to the values in the intersection of the two arrays. * update intersect1d with suggested changes * implemented return_indices options for intersect1d I've tested out the above code and for the ~10 different test sets I've tried it's worked thus far. It's probably not the cleanest implementation but it works and is vectorized. * cleaned up structure for intersect1d * fixed copy-paste error, added second test, changed a conditional * Testing return_indices in intersect1d * formatting * created separate test function for intersect1d indices, added spaces after commas * fixed up example and some style * fixed style * style change * removed one example * removed extra space * added version number for return_indices * added 'return_indices' keyword for np.intersect1d * fixed formatting * updated return_indices entry * fixed some typos and style * added bit about first instance of a value being used * STY: Fix comment formats * DOC: missing space * DOC: correct parameter names in docstring * made suggested changes * fixed a mistake from previous update also added documentation for comm1, comm2 to match doc from np.unique * added in tests for 2d inputs * STY: Add missing spaces around commas * TST: Correct array to actually be unique * STY: Spaces at beginning of comments --- numpy/lib/arraysetops.py | 65 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index e8eda297f..4d3f35183 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -298,7 +298,7 @@ def _unique1d(ar, return_index=False, return_inverse=False, return ret -def intersect1d(ar1, ar2, assume_unique=False): +def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): """ Find the intersection of two arrays. @@ -307,15 +307,28 @@ def intersect1d(ar1, ar2, assume_unique=False): Parameters ---------- ar1, ar2 : array_like - Input arrays. + Input arrays. Will be flattened if not already 1D. assume_unique : bool If True, the input arrays are both assumed to be unique, which can speed up the calculation. Default is False. - + return_indices : bool + If True, the indices which correspond to the intersection of the + two arrays are returned. The first instance of a value is used + if there are multiple. Default is False. + + .. versionadded:: 1.15.0 + Returns ------- intersect1d : ndarray Sorted 1D array of common and unique elements. + comm1 : ndarray + The indices of the first occurrences of the common values in `ar1`. + Only provided if `return_indices` is True. + comm2 : ndarray + The indices of the first occurrences of the common values in `ar2`. + Only provided if `return_indices` is True. + See Also -------- @@ -332,14 +345,49 @@ def intersect1d(ar1, ar2, assume_unique=False): >>> from functools import reduce >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) array([3]) + + To return the indices of the values common to the input arrays + along with the intersected values: + >>> x = np.array([1, 1, 2, 3, 4]) + >>> y = np.array([2, 1, 4, 6]) + >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) + >>> x_ind, y_ind + (array([0, 2, 4]), array([1, 0, 2])) + >>> xy, x[x_ind], y[y_ind] + (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) + """ if not assume_unique: - # Might be faster than unique( intersect1d( ar1, ar2 ) )? - ar1 = unique(ar1) - ar2 = unique(ar2) + if return_indices: + ar1, ind1 = unique(ar1, return_index=True) + ar2, ind2 = unique(ar2, return_index=True) + else: + ar1 = unique(ar1) + ar2 = unique(ar2) + else: + ar1 = ar1.ravel() + ar2 = ar2.ravel() + aux = np.concatenate((ar1, ar2)) - aux.sort() - return aux[:-1][aux[1:] == aux[:-1]] + if return_indices: + aux_sort_indices = np.argsort(aux, kind='mergesort') + aux = aux[aux_sort_indices] + else: + aux.sort() + + mask = aux[1:] == aux[:-1] + int1d = aux[:-1][mask] + + if return_indices: + ar1_indices = aux_sort_indices[:-1][mask] + ar2_indices = aux_sort_indices[1:][mask] - ar1.size + if not assume_unique: + ar1_indices = ind1[ar1_indices] + ar2_indices = ind2[ar2_indices] + + return int1d, ar1_indices, ar2_indices + else: + return int1d def setxor1d(ar1, ar2, assume_unique=False): """ @@ -660,3 +708,4 @@ def setdiff1d(ar1, ar2, assume_unique=False): ar1 = unique(ar1) ar2 = unique(ar2) return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)] + -- cgit v1.2.1 From 917b0794e8e68a443f94299a80c51491cdc1c6cb Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 25 Jun 2018 23:56:16 -0700 Subject: DOC: Clear up confusion between np.where(cond) and np.where(cond, x, y) Eliminates all mentions of `np.where(cond)`, instead pointing the reader to np.nonzero. Also changes some example numbers to avoid collisions, making them easier to follow. Some minor doc improvements for np.ma.where too. --- numpy/lib/arraysetops.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'numpy/lib/arraysetops.py') diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 4d3f35183..5880ea154 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -607,6 +607,14 @@ def isin(element, test_elements, assume_unique=False, invert=False): [ True, False]]) >>> element[mask] array([2, 4]) + + The indices of the matched values can be obtained with `nonzero`: + + >>> np.nonzero(mask) + (array([0, 1]), array([1, 0])) + + The test can also be inverted: + >>> mask = np.isin(element, test_elements, invert=True) >>> mask array([[ True, False], -- cgit v1.2.1