summaryrefslogtreecommitdiff
path: root/arraysetops.py
diff options
context:
space:
mode:
authorTravis Oliphant <oliphant@enthought.com>2007-12-15 18:54:52 +0000
committerTravis Oliphant <oliphant@enthought.com>2007-12-15 18:54:52 +0000
commite76b5fa6896c09257181675bbf4cf47789d32927 (patch)
tree7174e22c68fc47df61e745ee18625ee9f4f5b88c /arraysetops.py
parent02ee35a7e1c722a1cdac8f3a60fe9ef7aa079a37 (diff)
downloadnumpy-e76b5fa6896c09257181675bbf4cf47789d32927.tar.gz
Create a branch for io work in NumPy
Diffstat (limited to 'arraysetops.py')
-rw-r--r--arraysetops.py329
1 files changed, 329 insertions, 0 deletions
diff --git a/arraysetops.py b/arraysetops.py
new file mode 100644
index 000000000..6693fa81c
--- /dev/null
+++ b/arraysetops.py
@@ -0,0 +1,329 @@
+"""
+Set operations for 1D numeric arrays based on sorting.
+
+:Contains:
+ ediff1d,
+ unique1d,
+ intersect1d,
+ intersect1d_nu,
+ setxor1d,
+ setmember1d,
+ union1d,
+ setdiff1d
+
+:Notes:
+
+All functions work best with integer numerical arrays on input (e.g. indices).
+For floating point arrays, innacurate results may appear due to usual round-off
+and floating point comparison issues.
+
+Except unique1d, union1d and intersect1d_nu, all functions expect inputs with
+unique elements. Speed could be gained in some operations by an implementaion of
+sort(), that can provide directly the permutation vectors, avoiding thus calls
+to argsort().
+
+Run _test_unique1d_speed() to compare performance of numpy.unique1d() and
+numpy.unique() - it should be the same.
+
+To do: Optionally return indices analogously to unique1d for all functions.
+
+created: 01.11.2005
+last revision: 07.01.2007
+
+:Author: Robert Cimrman
+"""
+__all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
+ 'setmember1d', 'union1d', 'setdiff1d']
+
+import time
+import numpy as nm
+
+def ediff1d(ary, to_end = None, to_begin = None):
+ """The differences between consecutive elements of an array, possibly with
+ prefixed and/or appended values.
+
+ :Parameters:
+ - `ary` : array
+ This array will be flattened before the difference is taken.
+ - `to_end` : number, optional
+ If provided, this number will be tacked onto the end of the returned
+ differences.
+ - `to_begin` : number, optional
+ If provided, this number will be taked onto the beginning of the
+ returned differences.
+
+ :Returns:
+ - `ed` : array
+ The differences. Loosely, this will be (ary[1:] - ary[:-1]).
+ """
+ ary = nm.asarray(ary).flat
+ ed = ary[1:] - ary[:-1]
+ arrays = [ed]
+ if to_begin is not None:
+ arrays.insert(0, to_begin)
+ if to_end is not None:
+ arrays.append(to_end)
+
+ if len(arrays) != 1:
+ # We'll save ourselves a copy of a potentially large array in the common
+ # case where neither to_begin or to_end was given.
+ ed = nm.hstack(arrays)
+
+ return ed
+
+def unique1d(ar1, return_index=False):
+ """Find the unique elements of 1D array.
+
+ Most of the other array set operations operate on the unique arrays
+ generated by this function.
+
+ :Parameters:
+ - `ar1` : array
+ This array will be flattened if it is not already 1D.
+ - `return_index` : bool, optional
+ If True, also return the indices against ar1 that result in the unique
+ array.
+
+ :Returns:
+ - `unique` : array
+ The unique values.
+ - `unique_indices` : int array, optional
+ The indices of the unique values. Only provided if return_index is True.
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ ar = nm.asarray(ar1).flatten()
+ if ar.size == 0:
+ if return_index: return nm.empty(0, nm.bool), ar
+ else: return ar
+
+ if return_index:
+ perm = ar.argsort()
+ aux = ar[perm]
+ flag = nm.concatenate( ([True], aux[1:] != aux[:-1]) )
+ return perm[flag], aux[flag]
+
+ else:
+ ar.sort()
+ flag = nm.concatenate( ([True], ar[1:] != ar[:-1]) )
+ return ar[flag]
+
+def intersect1d( ar1, ar2 ):
+ """Intersection of 1D arrays with unique elements.
+
+ Use unique1d() to generate arrays with only unique elements to use as inputs
+ to this function. Alternatively, use intersect1d_nu() which will find the
+ unique values for you.
+
+ :Parameters:
+ - `ar1` : array
+ - `ar2` : array
+
+ :Returns:
+ - `intersection` : array
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ aux = nm.concatenate((ar1,ar2))
+ aux.sort()
+ return aux[aux[1:] == aux[:-1]]
+
+def intersect1d_nu( ar1, ar2 ):
+ """Intersection of 1D arrays with any elements.
+
+ The input arrays do not have unique elements like intersect1d() requires.
+
+ :Parameters:
+ - `ar1` : array
+ - `ar2` : array
+
+ :Returns:
+ - `intersection` : array
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ # Might be faster than unique1d( intersect1d( ar1, ar2 ) )?
+ aux = nm.concatenate((unique1d(ar1), unique1d(ar2)))
+ aux.sort()
+ return aux[aux[1:] == aux[:-1]]
+
+def setxor1d( ar1, ar2 ):
+ """Set exclusive-or of 1D arrays with unique elements.
+
+ Use unique1d() to generate arrays with only unique elements to use as inputs
+ to this function.
+
+ :Parameters:
+ - `ar1` : array
+ - `ar2` : array
+
+ :Returns:
+ - `xor` : array
+ The values that are only in one, but not both, of the input arrays.
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ aux = nm.concatenate((ar1, ar2))
+ if aux.size == 0:
+ return aux
+
+ aux.sort()
+# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0
+ flag = nm.concatenate( ([True], aux[1:] != aux[:-1], [True] ) )
+# flag2 = ediff1d( flag ) == 0
+ flag2 = flag[1:] == flag[:-1]
+ return aux[flag2]
+
+def setmember1d( ar1, ar2 ):
+ """Return a boolean array of shape of ar1 containing True where the elements
+ of ar1 are in ar2 and False otherwise.
+
+ Use unique1d() to generate arrays with only unique elements to use as inputs
+ to this function.
+
+ :Parameters:
+ - `ar1` : array
+ - `ar2` : array
+
+ :Returns:
+ - `mask` : bool array
+ The values ar1[mask] are in ar2.
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ zlike = nm.zeros_like
+ ar = nm.concatenate( (ar1, ar2 ) )
+ tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) )
+ # We need this to be a stable sort, so always use 'mergesort' here. The
+ # values from the first array should always come before the values from the
+ # second array.
+ perm = ar.argsort(kind='mergesort')
+ aux = ar[perm]
+ aux2 = tt[perm]
+# flag = ediff1d( aux, 1 ) == 0
+ flag = nm.concatenate( (aux[1:] == aux[:-1], [False] ) )
+
+ ii = nm.where( flag * aux2 )[0]
+ aux = perm[ii+1]
+ perm[ii+1] = perm[ii]
+ perm[ii] = aux
+
+ indx = perm.argsort(kind='mergesort')[:len( ar1 )]
+
+ return flag[indx]
+
+def union1d( ar1, ar2 ):
+ """Union of 1D arrays with unique elements.
+
+ Use unique1d() to generate arrays with only unique elements to use as inputs
+ to this function.
+
+ :Parameters:
+ - `ar1` : array
+ - `ar2` : array
+
+ :Returns:
+ - `union` : array
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ return unique1d( nm.concatenate( (ar1, ar2) ) )
+
+def setdiff1d( ar1, ar2 ):
+ """Set difference of 1D arrays with unique elements.
+
+ Use unique1d() to generate arrays with only unique elements to use as inputs
+ to this function.
+
+ :Parameters:
+ - `ar1` : array
+ - `ar2` : array
+
+ :Returns:
+ - `difference` : array
+ The values in ar1 that are not in ar2.
+
+ :See also:
+ numpy.lib.arraysetops has a number of other functions for performing set
+ operations on arrays.
+ """
+ aux = setmember1d(ar1,ar2)
+ if aux.size == 0:
+ return aux
+ else:
+ return nm.asarray(ar1)[aux == 0]
+
+def _test_unique1d_speed( plot_results = False ):
+# exponents = nm.linspace( 2, 7, 9 )
+ exponents = nm.linspace( 2, 7, 9 )
+ ratios = []
+ nItems = []
+ dt1s = []
+ dt2s = []
+ for ii in exponents:
+
+ nItem = 10 ** ii
+ print 'using %d items:' % nItem
+ a = nm.fix( nItem / 10 * nm.random.random( nItem ) )
+
+ print 'unique:'
+ tt = time.clock()
+ b = nm.unique( a )
+ dt1 = time.clock() - tt
+ print dt1
+
+ print 'unique1d:'
+ tt = time.clock()
+ c = unique1d( a )
+ dt2 = time.clock() - tt
+ print dt2
+
+
+ if dt1 < 1e-8:
+ ratio = 'ND'
+ else:
+ ratio = dt2 / dt1
+ print 'ratio:', ratio
+ print 'nUnique: %d == %d\n' % (len( b ), len( c ))
+
+ nItems.append( nItem )
+ ratios.append( ratio )
+ dt1s.append( dt1 )
+ dt2s.append( dt2 )
+
+ assert nm.alltrue( b == c )
+
+ print nItems
+ print dt1s
+ print dt2s
+ print ratios
+
+ if plot_results:
+ import pylab
+
+ def plotMe( fig, fun, nItems, dt1s, dt2s ):
+ pylab.figure( fig )
+ fun( nItems, dt1s, 'g-o', linewidth = 2, markersize = 8 )
+ fun( nItems, dt2s, 'b-x', linewidth = 2, markersize = 8 )
+ pylab.legend( ('unique', 'unique1d' ) )
+ pylab.xlabel( 'nItem' )
+ pylab.ylabel( 'time [s]' )
+
+ plotMe( 1, pylab.loglog, nItems, dt1s, dt2s )
+ plotMe( 2, pylab.plot, nItems, dt1s, dt2s )
+ pylab.show()
+
+if (__name__ == '__main__'):
+ _test_unique1d_speed( plot_results = True )