diff options
author | Travis Oliphant <oliphant@enthought.com> | 2007-12-15 18:54:52 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2007-12-15 18:54:52 +0000 |
commit | e76b5fa6896c09257181675bbf4cf47789d32927 (patch) | |
tree | 7174e22c68fc47df61e745ee18625ee9f4f5b88c /arraysetops.py | |
parent | 02ee35a7e1c722a1cdac8f3a60fe9ef7aa079a37 (diff) | |
download | numpy-e76b5fa6896c09257181675bbf4cf47789d32927.tar.gz |
Create a branch for io work in NumPy
Diffstat (limited to 'arraysetops.py')
-rw-r--r-- | arraysetops.py | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/arraysetops.py b/arraysetops.py new file mode 100644 index 000000000..6693fa81c --- /dev/null +++ b/arraysetops.py @@ -0,0 +1,329 @@ +""" +Set operations for 1D numeric arrays based on sorting. + +:Contains: + ediff1d, + unique1d, + intersect1d, + intersect1d_nu, + setxor1d, + setmember1d, + union1d, + setdiff1d + +:Notes: + +All functions work best with integer numerical arrays on input (e.g. indices). +For floating point arrays, innacurate results may appear due to usual round-off +and floating point comparison issues. + +Except unique1d, union1d and intersect1d_nu, all functions expect inputs with +unique elements. Speed could be gained in some operations by an implementaion of +sort(), that can provide directly the permutation vectors, avoiding thus calls +to argsort(). + +Run _test_unique1d_speed() to compare performance of numpy.unique1d() and +numpy.unique() - it should be the same. + +To do: Optionally return indices analogously to unique1d for all functions. + +created: 01.11.2005 +last revision: 07.01.2007 + +:Author: Robert Cimrman +""" +__all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', + 'setmember1d', 'union1d', 'setdiff1d'] + +import time +import numpy as nm + +def ediff1d(ary, to_end = None, to_begin = None): + """The differences between consecutive elements of an array, possibly with + prefixed and/or appended values. + + :Parameters: + - `ary` : array + This array will be flattened before the difference is taken. + - `to_end` : number, optional + If provided, this number will be tacked onto the end of the returned + differences. + - `to_begin` : number, optional + If provided, this number will be taked onto the beginning of the + returned differences. + + :Returns: + - `ed` : array + The differences. Loosely, this will be (ary[1:] - ary[:-1]). + """ + ary = nm.asarray(ary).flat + ed = ary[1:] - ary[:-1] + arrays = [ed] + if to_begin is not None: + arrays.insert(0, to_begin) + if to_end is not None: + arrays.append(to_end) + + if len(arrays) != 1: + # We'll save ourselves a copy of a potentially large array in the common + # case where neither to_begin or to_end was given. + ed = nm.hstack(arrays) + + return ed + +def unique1d(ar1, return_index=False): + """Find the unique elements of 1D array. + + Most of the other array set operations operate on the unique arrays + generated by this function. + + :Parameters: + - `ar1` : array + This array will be flattened if it is not already 1D. + - `return_index` : bool, optional + If True, also return the indices against ar1 that result in the unique + array. + + :Returns: + - `unique` : array + The unique values. + - `unique_indices` : int array, optional + The indices of the unique values. Only provided if return_index is True. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + ar = nm.asarray(ar1).flatten() + if ar.size == 0: + if return_index: return nm.empty(0, nm.bool), ar + else: return ar + + if return_index: + perm = ar.argsort() + aux = ar[perm] + flag = nm.concatenate( ([True], aux[1:] != aux[:-1]) ) + return perm[flag], aux[flag] + + else: + ar.sort() + flag = nm.concatenate( ([True], ar[1:] != ar[:-1]) ) + return ar[flag] + +def intersect1d( ar1, ar2 ): + """Intersection of 1D arrays with unique elements. + + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. Alternatively, use intersect1d_nu() which will find the + unique values for you. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `intersection` : array + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + aux = nm.concatenate((ar1,ar2)) + aux.sort() + return aux[aux[1:] == aux[:-1]] + +def intersect1d_nu( ar1, ar2 ): + """Intersection of 1D arrays with any elements. + + The input arrays do not have unique elements like intersect1d() requires. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `intersection` : array + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? + aux = nm.concatenate((unique1d(ar1), unique1d(ar2))) + aux.sort() + return aux[aux[1:] == aux[:-1]] + +def setxor1d( ar1, ar2 ): + """Set exclusive-or of 1D arrays with unique elements. + + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `xor` : array + The values that are only in one, but not both, of the input arrays. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + aux = nm.concatenate((ar1, ar2)) + if aux.size == 0: + return aux + + aux.sort() +# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 + flag = nm.concatenate( ([True], aux[1:] != aux[:-1], [True] ) ) +# flag2 = ediff1d( flag ) == 0 + flag2 = flag[1:] == flag[:-1] + return aux[flag2] + +def setmember1d( ar1, ar2 ): + """Return a boolean array of shape of ar1 containing True where the elements + of ar1 are in ar2 and False otherwise. + + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `mask` : bool array + The values ar1[mask] are in ar2. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + zlike = nm.zeros_like + ar = nm.concatenate( (ar1, ar2 ) ) + tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) ) + # We need this to be a stable sort, so always use 'mergesort' here. The + # values from the first array should always come before the values from the + # second array. + perm = ar.argsort(kind='mergesort') + aux = ar[perm] + aux2 = tt[perm] +# flag = ediff1d( aux, 1 ) == 0 + flag = nm.concatenate( (aux[1:] == aux[:-1], [False] ) ) + + ii = nm.where( flag * aux2 )[0] + aux = perm[ii+1] + perm[ii+1] = perm[ii] + perm[ii] = aux + + indx = perm.argsort(kind='mergesort')[:len( ar1 )] + + return flag[indx] + +def union1d( ar1, ar2 ): + """Union of 1D arrays with unique elements. + + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `union` : array + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + return unique1d( nm.concatenate( (ar1, ar2) ) ) + +def setdiff1d( ar1, ar2 ): + """Set difference of 1D arrays with unique elements. + + Use unique1d() to generate arrays with only unique elements to use as inputs + to this function. + + :Parameters: + - `ar1` : array + - `ar2` : array + + :Returns: + - `difference` : array + The values in ar1 that are not in ar2. + + :See also: + numpy.lib.arraysetops has a number of other functions for performing set + operations on arrays. + """ + aux = setmember1d(ar1,ar2) + if aux.size == 0: + return aux + else: + return nm.asarray(ar1)[aux == 0] + +def _test_unique1d_speed( plot_results = False ): +# exponents = nm.linspace( 2, 7, 9 ) + exponents = nm.linspace( 2, 7, 9 ) + ratios = [] + nItems = [] + dt1s = [] + dt2s = [] + for ii in exponents: + + nItem = 10 ** ii + print 'using %d items:' % nItem + a = nm.fix( nItem / 10 * nm.random.random( nItem ) ) + + print 'unique:' + tt = time.clock() + b = nm.unique( a ) + dt1 = time.clock() - tt + print dt1 + + print 'unique1d:' + tt = time.clock() + c = unique1d( a ) + dt2 = time.clock() - tt + print dt2 + + + if dt1 < 1e-8: + ratio = 'ND' + else: + ratio = dt2 / dt1 + print 'ratio:', ratio + print 'nUnique: %d == %d\n' % (len( b ), len( c )) + + nItems.append( nItem ) + ratios.append( ratio ) + dt1s.append( dt1 ) + dt2s.append( dt2 ) + + assert nm.alltrue( b == c ) + + print nItems + print dt1s + print dt2s + print ratios + + if plot_results: + import pylab + + def plotMe( fig, fun, nItems, dt1s, dt2s ): + pylab.figure( fig ) + fun( nItems, dt1s, 'g-o', linewidth = 2, markersize = 8 ) + fun( nItems, dt2s, 'b-x', linewidth = 2, markersize = 8 ) + pylab.legend( ('unique', 'unique1d' ) ) + pylab.xlabel( 'nItem' ) + pylab.ylabel( 'time [s]' ) + + plotMe( 1, pylab.loglog, nItems, dt1s, dt2s ) + plotMe( 2, pylab.plot, nItems, dt1s, dt2s ) + pylab.show() + +if (__name__ == '__main__'): + _test_unique1d_speed( plot_results = True ) |