diff options
author | Travis Oliphant <oliphant@enthought.com> | 2009-08-28 15:36:42 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2009-08-28 15:36:42 +0000 |
commit | 2b01ee6b966b9ca298247e6717e3a5be16a92970 (patch) | |
tree | 6bbb8ee8eebdfe2ef3eb26f13994193b313c6fe7 /numpy/lib/arraysetops.py | |
parent | c2191bc97da8a0879cec8d3e9a7a93fe9e66fcd8 (diff) | |
parent | fddd4b9c3b8f18ba7cf386f766b70ec3328b1c69 (diff) | |
download | numpy-2b01ee6b966b9ca298247e6717e3a5be16a92970.tar.gz |
Re-base the date-time branch back to the trunk.
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 305 |
1 files changed, 161 insertions, 144 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index c5e7822f2..b8ae9a9f3 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -3,40 +3,36 @@ Set operations for 1D numeric arrays based on sorting. :Contains: ediff1d, - unique1d, + unique, intersect1d, - intersect1d_nu, setxor1d, - setmember1d, - setmember1d_nu, + in1d, union1d, setdiff1d +:Deprecated: + unique1d, + intersect1d_nu, + setmember1d + :Notes: -All functions work best with integer numerical arrays on input (e.g. indices). -For floating point arrays, innacurate results may appear due to usual round-off +For floating point arrays, inaccurate results may appear due to usual round-off and floating point comparison issues. -Except unique1d, union1d and intersect1d_nu, all functions expect inputs with -unique elements. Speed could be gained in some operations by an implementaion of -sort(), that can provide directly the permutation vectors, avoiding thus calls -to argsort(). +Speed could be gained in some operations by an implementation of +sort(), that can provide directly the permutation vectors, avoiding +thus calls to argsort(). -Run _test_unique1d_speed() to compare performance of numpy.unique1d() and -numpy.unique() - it should be the same. - -To do: Optionally return indices analogously to unique1d for all functions. - -created: 01.11.2005 -last revision: 07.01.2007 +To do: Optionally return indices analogously to unique for all functions. :Author: Robert Cimrman """ __all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', - 'setmember1d', 'setmember1d_nu', 'union1d', 'setdiff1d'] + 'setmember1d', 'union1d', 'setdiff1d', 'unique', 'in1d'] import numpy as np +from numpy.lib.utils import deprecate_with_doc def ediff1d(ary, to_end=None, to_begin=None): """ @@ -50,7 +46,7 @@ def ediff1d(ary, to_end=None, to_begin=None): If provided, this number will be tacked onto the end of the returned differences. to_begin : number, optional - If provided, this number will be taked onto the beginning of the + If provided, this number will be tacked onto the beginning of the returned differences. Returns @@ -73,26 +69,26 @@ def ediff1d(ary, to_end=None, to_begin=None): arrays.append(to_end) if len(arrays) != 1: - # We'll save ourselves a copy of a potentially large array in the common - # case where neither to_begin or to_end was given. + # We'll save ourselves a copy of a potentially large array in + # the common case where neither to_begin or to_end was given. ed = np.hstack(arrays) return ed -def unique1d(ar1, return_index=False, return_inverse=False): +def unique(ar, return_index=False, return_inverse=False): """ Find the unique elements of an array. Parameters ---------- - ar1 : array_like + ar : array_like This array will be flattened if it is not already 1-D. return_index : bool, optional If True, also return the indices against `ar1` that result in the unique array. return_inverse : bool, optional If True, also return the indices against the unique array that - result in `ar1`. + result in `ar`. Returns ------- @@ -112,17 +108,17 @@ def unique1d(ar1, return_index=False, return_inverse=False): Examples -------- - >>> np.unique1d([1, 1, 2, 2, 3, 3]) + >>> np.unique([1, 1, 2, 2, 3, 3]) array([1, 2, 3]) >>> a = np.array([[1, 1], [2, 3]]) - >>> np.unique1d(a) + >>> np.unique(a) array([1, 2, 3]) Reconstruct the input from unique values: - >>> np.unique1d([1,2,6,4,2,3,2], return_index=True) + >>> np.unique([1,2,6,4,2,3,2], return_index=True) >>> x = [1,2,6,4,2,3,2] - >>> u, i = np.unique1d(x, return_inverse=True) + >>> u, i = np.unique(x, return_inverse=True) >>> u array([1, 2, 3, 4, 6]) >>> i @@ -131,14 +127,15 @@ def unique1d(ar1, return_index=False, return_inverse=False): [1, 2, 6, 4, 2, 3, 2] """ - if return_index: - import warnings - warnings.warn("The order of the output arguments for " - "`return_index` has changed. Before, " - "the output was (indices, unique_arr), but " - "has now been reversed to be more consistent.") + try: + ar = ar.flatten() + except AttributeError: + if not return_inverse and not return_index: + items = sorted(set(ar)) + return np.asarray(items) + else: + ar = np.asanyarray(ar).flatten() - ar = np.asanyarray(ar1).flatten() if ar.size == 0: if return_inverse and return_index: return ar, np.empty(0, np.bool), np.empty(0, np.bool) @@ -166,44 +163,18 @@ def unique1d(ar1, return_index=False, return_inverse=False): flag = np.concatenate(([True], ar[1:] != ar[:-1])) return ar[flag] -def intersect1d(ar1, ar2): - """ - Intersection returning repeated or unique elements common to both arrays. - - Parameters - ---------- - ar1,ar2 : array_like - Input arrays. - - Returns - ------- - out : ndarray, shape(N,) - Sorted 1D array of common elements with repeating elements. - - See Also - -------- - intersect1d_nu : Returns only unique common elements. - numpy.lib.arraysetops : Module with a number of other functions for - performing set operations on arrays. - - Examples - -------- - >>> np.intersect1d([1,3,3],[3,1,1]) - array([1, 1, 3, 3]) - - """ - aux = np.concatenate((ar1,ar2)) - aux.sort() - return aux[aux[1:] == aux[:-1]] -def intersect1d_nu(ar1, ar2): +def intersect1d(ar1, ar2, assume_unique=False): """ Intersection returning unique elements common to both arrays. Parameters ---------- - ar1,ar2 : array_like + ar1, ar2 : array_like Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- @@ -212,34 +183,34 @@ def intersect1d_nu(ar1, ar2): See Also -------- - intersect1d : Returns repeated or unique common elements. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. Examples -------- - >>> np.intersect1d_nu([1,3,3],[3,1,1]) + >>> np.intersect1d([1,3,3], [3,1,1]) array([1, 3]) """ - # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? - aux = np.concatenate((unique1d(ar1), unique1d(ar2))) + if not assume_unique: + # Might be faster than unique( intersect1d( ar1, ar2 ) )? + ar1 = unique(ar1) + ar2 = unique(ar2) + aux = np.concatenate( (ar1, ar2) ) aux.sort() return aux[aux[1:] == aux[:-1]] -def setxor1d(ar1, ar2): +def setxor1d(ar1, ar2, assume_unique=False): """ - Set exclusive-or of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Set exclusive-or of two 1D arrays. Parameters ---------- - ar1 : array_like - Input array. - ar2 : array_like - Input array. + ar1, ar2 : array_like + Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- @@ -252,7 +223,11 @@ def setxor1d(ar1, ar2): performing set operations on arrays. """ - aux = np.concatenate((ar1, ar2)) + if not assume_unique: + ar1 = unique(ar1) + ar2 = unique(ar2) + + aux = np.concatenate( (ar1, ar2) ) if aux.size == 0: return aux @@ -263,98 +238,68 @@ def setxor1d(ar1, ar2): flag2 = flag[1:] == flag[:-1] return aux[flag2] -def setmember1d(ar1, ar2): +def in1d(ar1, ar2, assume_unique=False): """ - Return a boolean array set True where first element is in second array. - - Boolean array is the shape of `ar1` containing True where the elements - of `ar1` are in `ar2` and False otherwise. + Test whether each element of an array is also present in a second array. - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Returns a boolean array the same length as `ar1` that is True + where an element of `ar1` is in `ar2` and False otherwise. Parameters ---------- - ar1 : array_like - Input array. - ar2 : array_like - Input array. + ar1, ar2 : array_like + Input arrays. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- mask : ndarray, bool The values `ar1[mask]` are in `ar2`. - See Also -------- - setmember1d_nu : Works for arrays with non-unique elements. numpy.lib.arraysetops : Module with a number of other functions for performing set operations on arrays. + Notes + ----- + .. versionadded:: 1.4.0 + Examples -------- >>> test = np.arange(5) >>> states = [0, 2] - >>> mask = np.setmember1d(test,states) + >>> mask = np.setmember1d(test, states) >>> mask array([ True, False, True, False, False], dtype=bool) >>> test[mask] array([0, 2]) """ - # We need this to be a stable sort, so always use 'mergesort' here. The - # values from the first array should always come before the values from the - # second array. - ar = np.concatenate( (ar1, ar2 ) ) + if not assume_unique: + ar1, rev_idx = np.unique(ar1, return_inverse=True) + ar2 = np.unique(ar2) + + ar = np.concatenate( (ar1, ar2) ) + # We need this to be a stable sort, so always use 'mergesort' + # here. The values from the first array should always come before + # the values from the second array. order = ar.argsort(kind='mergesort') sar = ar[order] equal_adj = (sar[1:] == sar[:-1]) flag = np.concatenate( (equal_adj, [False] ) ) - indx = order.argsort(kind='mergesort')[:len( ar1 )] - return flag[indx] - -def setmember1d_nu(ar1, ar2): - """ - Return a boolean array set True where first element is in second array. - - Boolean array is the shape of `ar1` containing True where the elements - of `ar1` are in `ar2` and False otherwise. - - Unlike setmember1d(), this version works also for arrays with duplicate - values. It uses setmember1d() internally. For arrays with unique - entries it is slower than calling setmember1d() directly. - - Parameters - ---------- - ar1 : array_like - Input array. - ar2 : array_like - Input array. - Returns - ------- - mask : ndarray, bool - The values `ar1[mask]` are in `ar2`. - - See Also - -------- - setmember1d : Faster for arrays with unique elements. - numpy.lib.arraysetops : Module with a number of other functions for - performing set operations on arrays. - - """ - unique_ar1, rev_idx = np.unique1d(ar1, return_inverse=True) - mask = np.setmember1d(unique_ar1, np.unique1d(ar2)) - return mask[rev_idx] + if assume_unique: + return flag[indx] + else: + return flag[indx][rev_idx] def union1d(ar1, ar2): """ - Union of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Union of two 1D arrays. Parameters ---------- @@ -374,14 +319,11 @@ def union1d(ar1, ar2): performing set operations on arrays. """ - return unique1d( np.concatenate( (ar1, ar2) ) ) + return unique( np.concatenate( (ar1, ar2) ) ) -def setdiff1d(ar1, ar2): +def setdiff1d(ar1, ar2, assume_unique=False): """ - Set difference of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as - inputs to this function. + Set difference of two 1D arrays. Parameters ---------- @@ -389,6 +331,9 @@ def setdiff1d(ar1, ar2): Input array. ar2 : array_like Input comparison array. + assume_unique : bool + If True, the input arrays are both assumed to be unique, which + can speed up the calculation. Default is False. Returns ------- @@ -401,8 +346,80 @@ def setdiff1d(ar1, ar2): performing set operations on arrays. """ - aux = setmember1d(ar1,ar2) + if not assume_unique: + ar1 = unique(ar1) + ar2 = unique(ar2) + aux = in1d(ar1, ar2, assume_unique=True) if aux.size == 0: return aux else: return np.asarray(ar1)[aux == 0] + +@deprecate_with_doc('') +def unique1d(ar1, return_index=False, return_inverse=False): + """ + This function is deprecated. Use unique() instead. + """ + if return_index: + import warnings + warnings.warn("The order of the output arguments for " + "`return_index` has changed. Before, " + "the output was (indices, unique_arr), but " + "has now been reversed to be more consistent.") + + ar = np.asanyarray(ar1).flatten() + if ar.size == 0: + if return_inverse and return_index: + return ar, np.empty(0, np.bool), np.empty(0, np.bool) + elif return_inverse or return_index: + return ar, np.empty(0, np.bool) + else: + return ar + + if return_inverse or return_index: + perm = ar.argsort() + aux = ar[perm] + flag = np.concatenate(([True], aux[1:] != aux[:-1])) + if return_inverse: + iflag = np.cumsum(flag) - 1 + iperm = perm.argsort() + if return_index: + return aux[flag], perm[flag], iflag[iperm] + else: + return aux[flag], iflag[iperm] + else: + return aux[flag], perm[flag] + + else: + ar.sort() + flag = np.concatenate(([True], ar[1:] != ar[:-1])) + return ar[flag] + +@deprecate_with_doc('') +def intersect1d_nu(ar1, ar2): + """ + This function is deprecated. Use intersect1d() + instead. + """ + # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? + aux = np.concatenate((unique1d(ar1), unique1d(ar2))) + aux.sort() + return aux[aux[1:] == aux[:-1]] + +@deprecate_with_doc('') +def setmember1d(ar1, ar2): + """ + This function is deprecated. Use in1d(assume_unique=True) + instead. + """ + # We need this to be a stable sort, so always use 'mergesort' here. The + # values from the first array should always come before the values from the + # second array. + ar = np.concatenate( (ar1, ar2 ) ) + order = ar.argsort(kind='mergesort') + sar = ar[order] + equal_adj = (sar[1:] == sar[:-1]) + flag = np.concatenate( (equal_adj, [False] ) ) + + indx = order.argsort(kind='mergesort')[:len( ar1 )] + return flag[indx] |