diff options
author | Robert Cimrman <cimrman3@ntc.zcu.cz> | 2009-07-08 15:46:09 +0000 |
---|---|---|
committer | Robert Cimrman <cimrman3@ntc.zcu.cz> | 2009-07-08 15:46:09 +0000 |
commit | c1c0533f20e349b5e638025ef173aa487b2422c7 (patch) | |
tree | e2fef41c590d17bbacae0ec36d447d13cf21afb6 /numpy/lib/arraysetops.py | |
parent | 434bc70004d63ed44dd5d793d5eb65b840f39362 (diff) | |
download | numpy-c1c0533f20e349b5e638025ef173aa487b2422c7.tar.gz |
Enhancements to arraysetops (ticket #1133, by Neil Crighton)
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 360 |
1 files changed, 164 insertions, 196 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index b1a730efa..fe1326aa0 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -3,40 +3,36 @@ Set operations for 1D numeric arrays based on sorting. :Contains: ediff1d, - unique1d, + unique, intersect1d, - intersect1d_nu, setxor1d, - setmember1d, - setmember1d_nu, + in1d, union1d, setdiff1d +:Deprecated: + unique1d, + intersect1d_nu, + setmember1d + :Notes: -All functions work best with integer numerical arrays on input (e.g. indices). -For floating point arrays, innacurate results may appear due to usual round-off +For floating point arrays, inaccurate results may appear due to usual round-off and floating point comparison issues. -Except unique1d, union1d and intersect1d_nu, all functions expect inputs with -unique elements. Speed could be gained in some operations by an implementaion of -sort(), that can provide directly the permutation vectors, avoiding thus calls -to argsort(). - -Run _test_unique1d_speed() to compare performance of numpy.unique1d() and -numpy.unique() - it should be the same. +Speed could be gained in some operations by an implementation of +sort(), that can provide directly the permutation vectors, avoiding +thus calls to argsort(). -To do: Optionally return indices analogously to unique1d for all functions. - -created: 01.11.2005 -last revision: 07.01.2007 +To do: Optionally return indices analogously to unique for all functions. :Author: Robert Cimrman """ __all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', - 'setmember1d', 'setmember1d_nu', 'union1d', 'setdiff1d'] + 'setmember1d', 'union1d', 'setdiff1d', 'unique', 'in1d'] import numpy as np +from numpy.lib.utils import deprecate_with_doc def ediff1d(ary, to_end=None, to_begin=None): """ @@ -46,45 +42,23 @@ def ediff1d(ary, to_end=None, to_begin=None): ---------- ary : array This array will be flattened before the difference is taken. - to_end : array_like, optional - If provided, this number will be appended to the end of the returned + to_end : number, optional + If provided, this number will be tacked onto the end of the returned differences. - to_begin : array_like, optional - If provided, this array will be appended to the beginning of the + to_begin : number, optional + If provided, this number will be tacked onto the beginning of the returned differences. Returns ------- ed : array - The differences. Loosely, this will be ``ary[1:] - ary[:-1]``. - - See Also - -------- - diff, gradient + The differences. Loosely, this will be (ary[1:] - ary[:-1]). Notes ----- When applied to masked arrays, this function drops the mask information if the `to_begin` and/or `to_end` parameters are used - Examples - -------- - >>> x = np.array([1, 2, 4, 7, 0]) - >>> np.ediff1d(x) - array([ 1, 2, 3, -7]) - - >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) - array([-99, 1, 2, 3, -7, 88, 99]) - - The returned array is always 1D. - - >>> y = np.array([[1, 2, 4], [1, 6, 24]]) - >>> y - array([[ 1, 2, 4], - [ 1, 6, 24]]) - >>> np.ediff1d(y) - array([ 1, 2, -3, 5, 18]) - """ ary = np.asanyarray(ary).flat ed = ary[1:] - ary[:-1] @@ -95,26 +69,26 @@ def ediff1d(ary, to_end=None, to_begin=None): arrays.append(to_end) if len(arrays) != 1: - # We'll save ourselves a copy of a potentially large array in the common - # case where neither to_begin or to_end was given. + # We'll save ourselves a copy of a potentially large array in + # the common case where neither to_begin or to_end was given. ed = np.hstack(arrays) return ed -def unique1d(ar1, return_index=False, return_inverse=False): +def unique(ar, return_index=False, return_inverse=False): """ Find the unique elements of an array. Parameters ---------- - ar1 : array_like + ar : array_like This array will be flattened if it is not already 1-D. return_index : bool, optional If True, also return the indices against `ar1` that result in the unique array. return_inverse : bool, optional If True, also return the indices against the unique array that - result in `ar1`. + result in `ar`. Returns ------- @@ -134,17 +108,17 @@ def unique1d(ar1, return_index=False, return_inverse=False): Examples -------- - >>> np.unique1d([1, 1, 2, 2, 3, 3]) + >>> np.unique([1, 1, 2, 2, 3, 3]) array([1, 2, 3]) >>> a = np.array([[1, 1], [2, 3]]) - >>> np.unique1d(a) + >>> np.unique(a) array([1, 2, 3]) Reconstruct the input from unique values: - >>> np.unique1d([1,2,6,4,2,3,2], return_index=True) + >>> np.unique([1,2,6,4,2,3,2], return_index=True) >>> x = [1,2,6,4,2,3,2] - >>> u, i = np.unique1d(x, return_inverse=True) + >>> u, i = np.unique(x, return_inverse=True) >>> u array([1, 2, 3, 4, 6]) >>> i @@ -153,14 +127,15 @@ def unique1d(ar1, return_index=False, return_inverse=False): [1, 2, 6, 4, 2, 3, 2] """ - if return_index: - import warnings - warnings.warn("The order of the output arguments for " - "`return_index` has changed. Before, " - "the output was (indices, unique_arr), but " - "has now been reversed to be more consistent.") + try: + ar = ar.flatten() + except AttributeError: + if not return_inverse and not return_index: + items = sorted(set(ar)) + return np.asarray(items) + else: + ar = np.asanyarray(ar).flatten() - ar = np.asanyarray(ar1).flatten() if ar.size == 0: if return_inverse and return_index: return ar, np.empty(0, np.bool), np.empty(0, np.bool) @@ -188,98 +163,54 @@ def unique1d(ar1, return_index=False, return_inverse=False): flag = np.concatenate(([True], ar[1:] != ar[:-1])) return ar[flag] -def intersect1d(ar1, ar2): - """ - Find elements that are common to two arrays. - - For speed, it is assumed the two input arrays do not have any - repeated elements. To find the intersection of two arrays that - have repeated elements, use `intersect1d_nu`. - - Parameters - ---------- - ar1,ar2 : array_like - Input arrays. These must be 1D and must not have repeated elements. - - Returns - ------- - out : ndarray, shape(N,) - Sorted array of common elements. - - See Also - -------- - intersect1d_nu : Find the intersection for input arrays with - repeated elements. - numpy.lib.arraysetops : Module with a number of other functions for - performing set operations on arrays. - Examples - -------- - >>> np.intersect1d([1, 2, 3], [2, 3, 4]) - array([2, 3]) - >>> np.intersect1d(['a','b','c'], ['b','c','d']) - array(['b', 'c'], - dtype='|S1') - - This function fails if the input arrays have repeated elements. - - >>> np.intersect1d([1, 1, 2, 3, 3, 4], [1, 4]) - array([1, 1, 3, 4]) - - """ - aux = np.concatenate((ar1,ar2)) - aux.sort() - return aux[aux[1:] == aux[:-1]] - -def intersect1d_nu(ar1, ar2): +def intersect1d(ar1, ar2, assume_unique=False): """ - Find elements common to two arrays. - - Returns an array of unique elements that represents the intersection - of the two input arrays. Unlike `intersect1d`, the input arrays can - have repeated elements. + Intersection returning unique elements common to both arrays. Parameters ---------- - ar1,ar2 : array_like + ar1, ar2 : array_like Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- out : ndarray, shape(N,) - Sorted 1D array of common, unique elements. + Sorted 1D array of common and unique elements. See Also -------- - intersect1d : Faster version of `intersect1d_nu` for 1D input arrays - without repeated elements. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Examples -------- - >>> np.intersect1d_nu([1, 3 ,3], [3, 3, 1, 1]) + >>> np.intersect1d([1,3,3], [3,1,1]) array([1, 3]) """ - # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? - aux = np.concatenate((unique1d(ar1), unique1d(ar2))) + if not assume_unique: + # Might be faster than unique( intersect1d( ar1, ar2 ) )? + ar1 = unique(ar1) + ar2 = unique(ar2) + aux = np.concatenate( (ar1, ar2) ) aux.sort() return aux[aux[1:] == aux[:-1]] -def setxor1d(ar1, ar2): +def setxor1d(ar1, ar2, assume_unique=False): """ - Set exclusive-or of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Set exclusive-or of two 1D arrays. Parameters ---------- - ar1 : array_like - Input array. - ar2 : array_like - Input array. + ar1, ar2 : array_like + Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- @@ -292,7 +223,11 @@ def setxor1d(ar1, ar2): performing set operations on arrays. """ - aux = np.concatenate((ar1, ar2)) + if not assume_unique: + ar1 = unique(ar1) + ar2 = unique(ar2) + + aux = np.concatenate( (ar1, ar2) ) if aux.size == 0: return aux @@ -303,21 +238,20 @@ def setxor1d(ar1, ar2): flag2 = flag[1:] == flag[:-1] return aux[flag2] -def setmember1d(ar1, ar2): +def in1d(ar1, ar2, assume_unique=False): """ - Test whether elements of one array are also present in a second array. + Test whether each element of an array is also present in a second array. - Returns a boolean array the same length as `ar1` that is True where an - element of `ar1` is also in `ar2` and False otherwise. The input arrays - must not contain any repeated elements. If they do, unique arrays can - be created using `unique1d()` to use as inputs to this function. + Returns a boolean array the same length as `ar1` that is True + where an element of `ar1` is in `ar2` and False otherwise. Parameters ---------- - ar1 : array_like - Input array. It must not have any repeated elements - ar2 : array_like - Input array. Again, it must not have any repeated elements. + ar1, ar2 : array_like + Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- @@ -326,14 +260,16 @@ def setmember1d(ar1, ar2): See Also -------- - setmember1d_nu : Works for arrays with non-unique elements. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. - unique1d : Find unique elements in an array. + + Notes + ----- + .. versionadded:: 1.4.0 Examples -------- - >>> test = [0, 1, 2, 3, 4, 5] + >>> test = np.arange(5) >>> states = [0, 2] >>> mask = np.setmember1d(test, states) >>> mask @@ -341,66 +277,29 @@ def setmember1d(ar1, ar2): >>> test[mask] array([0, 2]) - This function fails if there are repeated elements in the input arrays: - - >>> test = [0, 1, 1, 2, 3, 3] - >>> states = [0, 2] - >>> np.setmember1d(test,states) - array([ True, True, False, True, True, False], dtype=bool) # Wrong! - """ - # We need this to be a stable sort, so always use 'mergesort' here. The - # values from the first array should always come before the values from the - # second array. - ar = np.concatenate( (ar1, ar2 ) ) + if not assume_unique: + ar1, rev_idx = np.unique(ar1, return_inverse=True) + ar2 = np.unique(ar2) + + ar = np.concatenate( (ar1, ar2) ) + # We need this to be a stable sort, so always use 'mergesort' + # here. The values from the first array should always come before + # the values from the second array. order = ar.argsort(kind='mergesort') sar = ar[order] equal_adj = (sar[1:] == sar[:-1]) flag = np.concatenate( (equal_adj, [False] ) ) - indx = order.argsort(kind='mergesort')[:len( ar1 )] - return flag[indx] - -def setmember1d_nu(ar1, ar2): - """ - Return a boolean array set True where first element is in second array. - - Boolean array is the shape of `ar1` containing True where the elements - of `ar1` are in `ar2` and False otherwise. - - Unlike setmember1d(), this version works also for arrays with duplicate - values. It uses setmember1d() internally. For arrays with unique - entries it is slower than calling setmember1d() directly. - - Parameters - ---------- - ar1 : array_like - Input array. - ar2 : array_like - Input array. - - Returns - ------- - mask : ndarray, bool - The values `ar1[mask]` are in `ar2`. - See Also - -------- - setmember1d : Faster for arrays with unique elements. - numpy.lib.arraysetops : Module with a number of other functions for - performing set operations on arrays. - - """ - unique_ar1, rev_idx = np.unique1d(ar1, return_inverse=True) - mask = np.setmember1d(unique_ar1, np.unique1d(ar2)) - return mask[rev_idx] + if assume_unique: + return flag[indx] + else: + return flag[indx][rev_idx] def union1d(ar1, ar2): """ - Union of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Union of two 1D arrays. Parameters ---------- @@ -420,14 +319,11 @@ def union1d(ar1, ar2): performing set operations on arrays. """ - return unique1d( np.concatenate( (ar1, ar2) ) ) + return unique( np.concatenate( (ar1, ar2) ) ) -def setdiff1d(ar1, ar2): +def setdiff1d(ar1, ar2, assume_unique=False): """ - Set difference of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Set difference of two 1D arrays. Parameters ---------- @@ -435,6 +331,9 @@ def setdiff1d(ar1, ar2): Input array. ar2 : array_like Input comparison array. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- @@ -447,8 +346,77 @@ def setdiff1d(ar1, ar2): performing set operations on arrays. """ - aux = setmember1d(ar1,ar2) + aux = in1d(ar1, ar2, assume_unique=assume_unique) if aux.size == 0: return aux else: return np.asarray(ar1)[aux == 0] + +@deprecate_with_doc('') +def unique1d(ar1, return_index=False, return_inverse=False): + """ + This function is deprecated. Use unique() instead. + """ + if return_index: + import warnings + warnings.warn("The order of the output arguments for " + "`return_index` has changed. Before, " + "the output was (indices, unique_arr), but " + "has now been reversed to be more consistent.") + + ar = np.asanyarray(ar1).flatten() + if ar.size == 0: + if return_inverse and return_index: + return ar, np.empty(0, np.bool), np.empty(0, np.bool) + elif return_inverse or return_index: + return ar, np.empty(0, np.bool) + else: + return ar + + if return_inverse or return_index: + perm = ar.argsort() + aux = ar[perm] + flag = np.concatenate(([True], aux[1:] != aux[:-1])) + if return_inverse: + iflag = np.cumsum(flag) - 1 + iperm = perm.argsort() + if return_index: + return aux[flag], perm[flag], iflag[iperm] + else: + return aux[flag], iflag[iperm] + else: + return aux[flag], perm[flag] + + else: + ar.sort() + flag = np.concatenate(([True], ar[1:] != ar[:-1])) + return ar[flag] + +@deprecate_with_doc('') +def intersect1d_nu(ar1, ar2): + """ + This function is deprecated. Use intersect1d() + instead. + """ + # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? + aux = np.concatenate((unique1d(ar1), unique1d(ar2))) + aux.sort() + return aux[aux[1:] == aux[:-1]] + +@deprecate_with_doc('') +def setmember1d(ar1, ar2): + """ + This function is deprecated. Use in1d(assume_unique=True) + instead. + """ + # We need this to be a stable sort, so always use 'mergesort' here. The + # values from the first array should always come before the values from the + # second array. + ar = np.concatenate( (ar1, ar2 ) ) + order = ar.argsort(kind='mergesort') + sar = ar[order] + equal_adj = (sar[1:] == sar[:-1]) + flag = np.concatenate( (equal_adj, [False] ) ) + + indx = order.argsort(kind='mergesort')[:len( ar1 )] + return flag[indx] |