diff options
author | Travis Oliphant <oliphant@enthought.com> | 2007-12-15 18:54:52 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2007-12-15 18:54:52 +0000 |
commit | e76b5fa6896c09257181675bbf4cf47789d32927 (patch) | |
tree | 7174e22c68fc47df61e745ee18625ee9f4f5b88c /numpy/lib/arraysetops.py | |
parent | 02ee35a7e1c722a1cdac8f3a60fe9ef7aa079a37 (diff) | |
download | numpy-e76b5fa6896c09257181675bbf4cf47789d32927.tar.gz |
Create a branch for io work in NumPy
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 329 |
1 files changed, 0 insertions, 329 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py deleted file mode 100644 index 6693fa81c..000000000 --- a/numpy/lib/arraysetops.py +++ /dev/null @@ -1,329 +0,0 @@ -""" -Set operations for 1D numeric arrays based on sorting. - -:Contains: - ediff1d, - unique1d, - intersect1d, - intersect1d_nu, - setxor1d, - setmember1d, - union1d, - setdiff1d - -:Notes: - -All functions work best with integer numerical arrays on input (e.g. indices). -For floating point arrays, innacurate results may appear due to usual round-off -and floating point comparison issues. - -Except unique1d, union1d and intersect1d_nu, all functions expect inputs with -unique elements. Speed could be gained in some operations by an implementaion of -sort(), that can provide directly the permutation vectors, avoiding thus calls -to argsort(). - -Run _test_unique1d_speed() to compare performance of numpy.unique1d() and -numpy.unique() - it should be the same. - -To do: Optionally return indices analogously to unique1d for all functions. - -created: 01.11.2005 -last revision: 07.01.2007 - -:Author: Robert Cimrman -""" -__all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', - 'setmember1d', 'union1d', 'setdiff1d'] - -import time -import numpy as nm - -def ediff1d(ary, to_end = None, to_begin = None): - """The differences between consecutive elements of an array, possibly with - prefixed and/or appended values. - - :Parameters: - - `ary` : array - This array will be flattened before the difference is taken. - - `to_end` : number, optional - If provided, this number will be tacked onto the end of the returned - differences. - - `to_begin` : number, optional - If provided, this number will be taked onto the beginning of the - returned differences. - - :Returns: - - `ed` : array - The differences. Loosely, this will be (ary[1:] - ary[:-1]). - """ - ary = nm.asarray(ary).flat - ed = ary[1:] - ary[:-1] - arrays = [ed] - if to_begin is not None: - arrays.insert(0, to_begin) - if to_end is not None: - arrays.append(to_end) - - if len(arrays) != 1: - # We'll save ourselves a copy of a potentially large array in the common - # case where neither to_begin or to_end was given. - ed = nm.hstack(arrays) - - return ed - -def unique1d(ar1, return_index=False): - """Find the unique elements of 1D array. - - Most of the other array set operations operate on the unique arrays - generated by this function. - - :Parameters: - - `ar1` : array - This array will be flattened if it is not already 1D. - - `return_index` : bool, optional - If True, also return the indices against ar1 that result in the unique - array. - - :Returns: - - `unique` : array - The unique values. - - `unique_indices` : int array, optional - The indices of the unique values. Only provided if return_index is True. - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - ar = nm.asarray(ar1).flatten() - if ar.size == 0: - if return_index: return nm.empty(0, nm.bool), ar - else: return ar - - if return_index: - perm = ar.argsort() - aux = ar[perm] - flag = nm.concatenate( ([True], aux[1:] != aux[:-1]) ) - return perm[flag], aux[flag] - - else: - ar.sort() - flag = nm.concatenate( ([True], ar[1:] != ar[:-1]) ) - return ar[flag] - -def intersect1d( ar1, ar2 ): - """Intersection of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as inputs - to this function. Alternatively, use intersect1d_nu() which will find the - unique values for you. - - :Parameters: - - `ar1` : array - - `ar2` : array - - :Returns: - - `intersection` : array - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - aux = nm.concatenate((ar1,ar2)) - aux.sort() - return aux[aux[1:] == aux[:-1]] - -def intersect1d_nu( ar1, ar2 ): - """Intersection of 1D arrays with any elements. - - The input arrays do not have unique elements like intersect1d() requires. - - :Parameters: - - `ar1` : array - - `ar2` : array - - :Returns: - - `intersection` : array - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? - aux = nm.concatenate((unique1d(ar1), unique1d(ar2))) - aux.sort() - return aux[aux[1:] == aux[:-1]] - -def setxor1d( ar1, ar2 ): - """Set exclusive-or of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as inputs - to this function. - - :Parameters: - - `ar1` : array - - `ar2` : array - - :Returns: - - `xor` : array - The values that are only in one, but not both, of the input arrays. - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - aux = nm.concatenate((ar1, ar2)) - if aux.size == 0: - return aux - - aux.sort() -# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 - flag = nm.concatenate( ([True], aux[1:] != aux[:-1], [True] ) ) -# flag2 = ediff1d( flag ) == 0 - flag2 = flag[1:] == flag[:-1] - return aux[flag2] - -def setmember1d( ar1, ar2 ): - """Return a boolean array of shape of ar1 containing True where the elements - of ar1 are in ar2 and False otherwise. - - Use unique1d() to generate arrays with only unique elements to use as inputs - to this function. - - :Parameters: - - `ar1` : array - - `ar2` : array - - :Returns: - - `mask` : bool array - The values ar1[mask] are in ar2. - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - zlike = nm.zeros_like - ar = nm.concatenate( (ar1, ar2 ) ) - tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) ) - # We need this to be a stable sort, so always use 'mergesort' here. The - # values from the first array should always come before the values from the - # second array. - perm = ar.argsort(kind='mergesort') - aux = ar[perm] - aux2 = tt[perm] -# flag = ediff1d( aux, 1 ) == 0 - flag = nm.concatenate( (aux[1:] == aux[:-1], [False] ) ) - - ii = nm.where( flag * aux2 )[0] - aux = perm[ii+1] - perm[ii+1] = perm[ii] - perm[ii] = aux - - indx = perm.argsort(kind='mergesort')[:len( ar1 )] - - return flag[indx] - -def union1d( ar1, ar2 ): - """Union of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as inputs - to this function. - - :Parameters: - - `ar1` : array - - `ar2` : array - - :Returns: - - `union` : array - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - return unique1d( nm.concatenate( (ar1, ar2) ) ) - -def setdiff1d( ar1, ar2 ): - """Set difference of 1D arrays with unique elements. - - Use unique1d() to generate arrays with only unique elements to use as inputs - to this function. - - :Parameters: - - `ar1` : array - - `ar2` : array - - :Returns: - - `difference` : array - The values in ar1 that are not in ar2. - - :See also: - numpy.lib.arraysetops has a number of other functions for performing set - operations on arrays. - """ - aux = setmember1d(ar1,ar2) - if aux.size == 0: - return aux - else: - return nm.asarray(ar1)[aux == 0] - -def _test_unique1d_speed( plot_results = False ): -# exponents = nm.linspace( 2, 7, 9 ) - exponents = nm.linspace( 2, 7, 9 ) - ratios = [] - nItems = [] - dt1s = [] - dt2s = [] - for ii in exponents: - - nItem = 10 ** ii - print 'using %d items:' % nItem - a = nm.fix( nItem / 10 * nm.random.random( nItem ) ) - - print 'unique:' - tt = time.clock() - b = nm.unique( a ) - dt1 = time.clock() - tt - print dt1 - - print 'unique1d:' - tt = time.clock() - c = unique1d( a ) - dt2 = time.clock() - tt - print dt2 - - - if dt1 < 1e-8: - ratio = 'ND' - else: - ratio = dt2 / dt1 - print 'ratio:', ratio - print 'nUnique: %d == %d\n' % (len( b ), len( c )) - - nItems.append( nItem ) - ratios.append( ratio ) - dt1s.append( dt1 ) - dt2s.append( dt2 ) - - assert nm.alltrue( b == c ) - - print nItems - print dt1s - print dt2s - print ratios - - if plot_results: - import pylab - - def plotMe( fig, fun, nItems, dt1s, dt2s ): - pylab.figure( fig ) - fun( nItems, dt1s, 'g-o', linewidth = 2, markersize = 8 ) - fun( nItems, dt2s, 'b-x', linewidth = 2, markersize = 8 ) - pylab.legend( ('unique', 'unique1d' ) ) - pylab.xlabel( 'nItem' ) - pylab.ylabel( 'time [s]' ) - - plotMe( 1, pylab.loglog, nItems, dt1s, dt2s ) - plotMe( 2, pylab.plot, nItems, dt1s, dt2s ) - pylab.show() - -if (__name__ == '__main__'): - _test_unique1d_speed( plot_results = True ) |