diff options
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 192 |
1 files changed, 147 insertions, 45 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 1673ecf93..ca0269772 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -1,5 +1,5 @@ """ -Set operations for 1D numeric arrays based on sort() function. +Set operations for 1D numeric arrays based on sorting. Contains: ediff1d, @@ -11,16 +11,16 @@ Contains: union1d, setdiff1d -All functions work best with integer numerical arrays on input -(e.g. indices). For floating point arrays, innacurate results may appear due to -usual round-off and floating point comparison issues. +All functions work best with integer numerical arrays on input (e.g. indices). +For floating point arrays, innacurate results may appear due to usual round-off +and floating point comparison issues. Except unique1d, union1d and intersect1d_nu, all functions expect inputs with -unique elements. Speed could be gained in some operations by an implementaion -of sort(), that can provide directly the permutation vectors, avoiding thus -calls to argsort(). +unique elements. Speed could be gained in some operations by an implementaion of +sort(), that can provide directly the permutation vectors, avoiding thus calls +to argsort(). -Run test_unique1d_speed() to compare performance of numpy.unique1d() and +Run _test_unique1d_speed() to compare performance of numpy.unique1d() and numpy.unique() - it should be the same. To do: Optionally return indices analogously to unique1d for all functions. @@ -28,7 +28,7 @@ To do: Optionally return indices analogously to unique1d for all functions. Author: Robert Cimrman created: 01.11.2005 -last revision: 12.10.2006 +last revision: 07.01.2007 """ __all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', 'setmember1d', 'union1d', 'setdiff1d'] @@ -37,30 +37,60 @@ import time import numpy as nm def ediff1d(ary, to_end = None, to_begin = None): - """Array difference with prefixed and/or appended value. - - See also: unique1d, intersect1d, intersect1d_nu, setxor1d, - setmember1d, union1d, setdiff1d + """The differences between consecutive elements of an array, possibly with + prefixed and/or appended values. + + :Parameters: + - `ary` : array + This array will be flattened before the difference is taken. + - `to_end` : number, optional + If provided, this number will be tacked onto the end of the returned + differences. + - `to_begin` : number, optional + If provided, this number will be taked onto the beginning of the + returned differences. + + :Returns: + - `ed` : array + The differences. Loosely, this will be (ary[1:] - ary[:-1]). """ ary = nm.asarray(ary).flat ed = ary[1:] - ary[:-1] + arrays = [ed] if to_begin is not None: - if to_end is not None: - ed = nm.r_[to_begin, ed, to_end] - else: - ed = nm.insert(ed, 0, to_begin) - elif to_end is not None: - ed = nm.append(ed, to_end) - + arrays.insert(0, to_begin) + if to_end is not None: + arrays.append(to_end) + + if len(arrays) != 1: + # We'll save ourselves a copy of a potentially large array in the common + # case where neither to_begin or to_end was given. + ed = nm.hstack(arrays) + return ed def unique1d(ar1, return_index=False): - """Unique elements of 1D array. When return_index is True, return - also the indices indx such that ar1.flat[indx] is the resulting - array of unique elements. - - See also: ediff1d, intersect1d, intersect1d_nu, setxor1d, - setmember1d, union1d, setdiff1d + """Find the unique elements of 1D array. + + Most of the other array set operations operate on the unique arrays + generated by this function. + + :Parameters: + - `ar1` : array + This array will be flattened if it is not already 1D. + - `return_index` : bool, optional + If True, also return the indices against ar1 that result in the unique + array. + + :Returns: + - `unique` : array + The unique values. + - `unique_indices` : int array, optional + The indices of the unique values. Only provided if return_index is True. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ ar = nm.asarray(ar1).flatten() if ar.size == 0: @@ -81,8 +111,20 @@ def unique1d(ar1, return_index=False): def intersect1d( ar1, ar2 ): """Intersection of 1D arrays with unique elements. - See also: ediff1d, unique1d, intersect1d_nu, setxor1d, - setmember1d, union1d, setdiff1d + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. Alternatively, use intersect1d_nu() which will find the + unique values for you. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `intersection` : array + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ aux = nm.concatenate((ar1,ar2)) aux.sort() @@ -91,10 +133,20 @@ def intersect1d( ar1, ar2 ): def intersect1d_nu( ar1, ar2 ): """Intersection of 1D arrays with any elements. - See also: ediff1d, unique1d, intersect1d, setxor1d, - setmember1d, union1d, setdiff1d + The input arrays do not have unique elements like intersect1d() requires. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `intersection` : array + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ - # Might be faster then unique1d( intersect1d( ar1, ar2 ) )? + # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? aux = nm.concatenate((unique1d(ar1), unique1d(ar2))) aux.sort() return aux[aux[1:] == aux[:-1]] @@ -102,8 +154,20 @@ def intersect1d_nu( ar1, ar2 ): def setxor1d( ar1, ar2 ): """Set exclusive-or of 1D arrays with unique elements. - See also: ediff1d, unique1d, intersect1d, intersect1d_nu, - setmember1d, union1d, setdiff1d + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `xor` : array + The values that are only in one, but not both, of the input arrays. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ aux = nm.concatenate((ar1, ar2)) if aux.size == 0: @@ -117,16 +181,31 @@ def setxor1d( ar1, ar2 ): return aux[flag2] def setmember1d( ar1, ar2 ): - """Return an array of shape of ar1 containing 1 where the elements of - ar1 are in ar2 and 0 otherwise. + """Return a boolean array of shape of ar1 containing True where the elements + of ar1 are in ar2 and False otherwise. + + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array - See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d, - union1d, setdiff1d + :Returns: + - `mask` : bool array + The values ar1[mask] are in ar2. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ zlike = nm.zeros_like ar = nm.concatenate( (ar1, ar2 ) ) tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) ) - perm = ar.argsort() + # We need this to be a stable sort, so always use 'mergesort' here. The + # values from the first array should always come before the values from the + # second array. + perm = ar.argsort(kind='mergesort') aux = ar[perm] aux2 = tt[perm] # flag = ediff1d( aux, 1 ) == 0 @@ -137,23 +216,46 @@ def setmember1d( ar1, ar2 ): perm[ii+1] = perm[ii] perm[ii] = aux - indx = perm.argsort()[:len( ar1 )] + indx = perm.argsort(kind='mergesort')[:len( ar1 )] return flag[indx] def union1d( ar1, ar2 ): """Union of 1D arrays with unique elements. - See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d, - setmember1d, setdiff1d + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `union` : array + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ return unique1d( nm.concatenate( (ar1, ar2) ) ) def setdiff1d( ar1, ar2 ): """Set difference of 1D arrays with unique elements. - See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d, - setmember1d, union1d + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `difference` : array + The values in ar1 that are not in ar2. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. """ aux = setmember1d(ar1,ar2) if aux.size == 0: @@ -161,7 +263,7 @@ def setdiff1d( ar1, ar2 ): else: return nm.asarray(ar1)[aux == 0] -def test_unique1d_speed( plot_results = False ): +def _test_unique1d_speed( plot_results = False ): # exponents = nm.linspace( 2, 7, 9 ) exponents = nm.linspace( 2, 7, 9 ) ratios = [] @@ -222,4 +324,4 @@ def test_unique1d_speed( plot_results = False ): pylab.show() if (__name__ == '__main__'): - test_unique1d_speed( plot_results = True ) + _test_unique1d_speed( plot_results = True ) |