diff options
Diffstat (limited to 'numpy/lib/arraysetops.py')
-rw-r--r-- | numpy/lib/arraysetops.py | 170 |
1 files changed, 95 insertions, 75 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index a5786ca52..e4ed1a276 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -11,140 +11,160 @@ Contains: union1d, setdiff1d -Concerning the speed, test_unique1d_speed() reveals that up to 10000000 -elements unique1d() is about 10 times faster than the standard dictionary-based -numpy.unique(). +All functions work best with integer numerical arrays on input. For floating +point arrays, innacurate results may appear due to usual round-off and +floating point comparison issues. -Limitations: Except unique1d, union1d and intersect1d_nu, all functions expect -inputs with unique elements. Speed could be gained in some operations by an -implementaion of sort(), that can provide directly the permutation vectors, -avoiding thus calls to argsort(). +Except unique1d, union1d and intersect1d_nu, all functions expect inputs with +unique elements. Speed could be gained in some operations by an implementaion +of sort(), that can provide directly the permutation vectors, avoiding thus +calls to argsort(). + +Run test_unique1d_speed() to compare numpy.unique1d() with numpy.unique() - it +should be the same. To do: Optionally return indices analogously to unique1d for all functions. Author: Robert Cimrman + +created: 01.11.2005 +last revision: 12.10.2006 """ -__all__ = ['unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', +__all__ = ['ediff1d', 'unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d', 'setmember1d', 'union1d', 'setdiff1d'] -# 02.11.2005, c import time -import numpy +import numpy as nm -## -# 03.11.2005, c def ediff1d(ary, to_end = None, to_begin = None): - """Array difference with prefixed and/or appended value.""" - ary = numpy.asarray(ary) + """Array difference with prefixed and/or appended value. + + See also: unique1d, intersect1d, intersect1d_nu, setxor1d, + setmember1d, union1d, setdiff1d + """ + ary = nm.asarray(ary).flat ed = ary[1:] - ary[:-1] if to_begin is not None: - ed = numpy.insert(ed, 0, to_begin) - if to_end is not None: - ed = numpy.append(ed, to_end) + if to_end is not None: + ed = nm.r_[to_begin, ed, to_end] + else: + ed = nm.insert(ed, 0, to_begin) + elif to_end is not None: + ed = nm.append(ed, to_end) return ed -## -# 01.11.2005, c -# 02.11.2005 def unique1d(ar1, return_index=False): """Unique elements of 1D array. When return_index is True, return also the indices indx such that ar1.flat[indx] is the resulting array of unique elements. + See also: ediff1d, intersect1d, intersect1d_nu, setxor1d, + setmember1d, union1d, setdiff1d """ - ar = numpy.asarray(ar1).ravel() + ar = nm.asarray(ar1).flatten() if ar.size == 0: - if return_index: return numpy.empty(0, numpy.bool), ar + if return_index: return nm.empty(0, nm.bool), ar else: return ar if return_index: perm = ar.argsort() - aux = ar.take(perm) - flag = ediff1d(aux, 1) != 0 - return perm.compress(flag), aux.compress(flag) + aux = ar[perm] + flag = nm.concatenate( ([True], aux[1:] != aux[:-1]) ) + return perm[flag], aux[flag] else: ar.sort() - return ar.compress(ediff1d(ar, 1) != 0) + flag = nm.concatenate( ([True], ar[1:] != ar[:-1]) ) + return ar[flag] -## -# 01.11.2005, c def intersect1d( ar1, ar2 ): - """Intersection of 1D arrays with unique elements.""" - aux = numpy.concatenate((ar1,ar2)) + """Intersection of 1D arrays with unique elements. + + See also: ediff1d, unique1d, intersect1d_nu, setxor1d, + setmember1d, union1d, setdiff1d + """ + aux = nm.concatenate((ar1,ar2)) aux.sort() - return aux.compress( (aux[1:] - aux[:-1]) == 0) + return aux[aux[1:] == aux[:-1]] -## -# 01.11.2005, c def intersect1d_nu( ar1, ar2 ): - """Intersection of 1D arrays with any elements.""" + """Intersection of 1D arrays with any elements. + + See also: ediff1d, unique1d, intersect1d, setxor1d, + setmember1d, union1d, setdiff1d + """ # Might be faster then unique1d( intersect1d( ar1, ar2 ) )? - aux = numpy.concatenate((unique1d(ar1), unique1d(ar2))) + aux = nm.concatenate((unique1d(ar1), unique1d(ar2))) aux.sort() - return aux.compress( (aux[1:] - aux[:-1]) == 0) + return aux[aux[1:] == aux[:-1]] -## -# 01.11.2005, c def setxor1d( ar1, ar2 ): - """Set exclusive-or of 1D arrays with unique elements.""" - aux = numpy.concatenate((ar1, ar2)) + """Set exclusive-or of 1D arrays with unique elements. + + See also: ediff1d, unique1d, intersect1d, intersect1d_nu, + setmember1d, union1d, setdiff1d + """ + aux = nm.concatenate((ar1, ar2)) if aux.size == 0: return aux aux.sort() - flag = ediff1d(aux, to_end = 1, to_begin = 1) == 0 - flag2 = ediff1d(flag) == 0 - return aux.compress(flag2) +# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 + flag = nm.concatenate( ([True], aux[1:] != aux[:-1], [True] ) ) +# flag2 = ediff1d( flag ) == 0 + flag2 = flag[1:] == flag[:-1] + return aux[flag2] -## -# 03.11.2005, c -# 05.01.2006 def setmember1d( ar1, ar2 ): """Return an array of shape of ar1 containing 1 where the elements of - ar1 are in ar2 and 0 otherwise.""" - concat = numpy.concatenate - zlike = numpy.zeros_like - ar = concat( (ar1, ar2 ) ) - tt = concat( (zlike( ar1 ), - zlike( ar2 ) + 1) ) + ar1 are in ar2 and 0 otherwise. + + See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d, + union1d, setdiff1d + """ + zlike = nm.zeros_like + ar = nm.concatenate( (ar1, ar2 ) ) + tt = nm.concatenate( (zlike( ar1 ), zlike( ar2 ) + 1) ) perm = ar.argsort() - aux = ar.take(perm) - aux2 = tt.take(perm) - flag = ediff1d( aux, 1 ) == 0 + aux = ar[perm] + aux2 = tt[perm] +# flag = ediff1d( aux, 1 ) == 0 + flag = nm.concatenate( (aux[1:] == aux[:-1], [False] ) ) - ii = numpy.where( flag * aux2 )[0] + ii = nm.where( flag * aux2 )[0] aux = perm[ii+1] perm[ii+1] = perm[ii] perm[ii] = aux indx = perm.argsort()[:len( ar1 )] - return flag.take( indx ) + return flag[indx] -## -# 03.11.2005, c def union1d( ar1, ar2 ): - """Union of 1D arrays with unique elements.""" - return unique1d( numpy.concatenate( (ar1, ar2) ) ) + """Union of 1D arrays with unique elements. + + See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d, + setmember1d, setdiff1d + """ + return unique1d( nm.concatenate( (ar1, ar2) ) ) -## -# 03.11.2005, c def setdiff1d( ar1, ar2 ): - """Set difference of 1D arrays with unique elements.""" + """Set difference of 1D arrays with unique elements. + + See also: ediff1d, unique1d, intersect1d, intersect1d_nu, setxor1d, + setmember1d, union1d + """ aux = setmember1d(ar1,ar2) if aux.size == 0: return aux else: - return numpy.asarray(ar1).compress(aux == 0) + return nm.asarray(ar1)[aux == 0] -## -# 02.11.2005, c def test_unique1d_speed( plot_results = False ): -# exponents = numpy.linspace( 2, 7, 9 ) - exponents = numpy.linspace( 2, 6, 9 ) +# exponents = nm.linspace( 2, 7, 9 ) + exponents = nm.linspace( 2, 7, 9 ) ratios = [] nItems = [] dt1s = [] @@ -153,15 +173,15 @@ def test_unique1d_speed( plot_results = False ): nItem = 10 ** ii print 'using %d items:' % nItem - a = numpy.fix( nItem / 10 * numpy.random.random( nItem ) ) + a = nm.fix( nItem / 10 * nm.random.random( nItem ) ) - print 'dictionary:' + print 'unique:' tt = time.clock() - b = numpy.unique( a ) + b = nm.unique( a ) dt1 = time.clock() - tt print dt1 - print 'array:' + print 'unique1d:' tt = time.clock() c = unique1d( a ) dt2 = time.clock() - tt @@ -180,7 +200,7 @@ def test_unique1d_speed( plot_results = False ): dt1s.append( dt1 ) dt2s.append( dt2 ) - assert numpy.alltrue( b == c ) + assert nm.alltrue( b == c ) print nItems @@ -195,7 +215,7 @@ def test_unique1d_speed( plot_results = False ): pylab.figure( fig ) fun( nItems, dt1s, 'g-o', linewidth = 2, markersize = 8 ) fun( nItems, dt2s, 'b-x', linewidth = 2, markersize = 8 ) - pylab.legend( ('dictionary', 'array' ) ) + pylab.legend( ('unique', 'unique1d' ) ) pylab.xlabel( 'nItem' ) pylab.ylabel( 'time [s]' ) |