Arraysetops added to numpy.lib

author: Robert Cimrman <cimrman3@ntc.zcu.cz> 2006-01-09 12:09:24 +0000
committer: Robert Cimrman <cimrman3@ntc.zcu.cz> 2006-01-09 12:09:24 +0000
commit: 669969980843dc207db170d99fa0884594c6bc7e (patch)
tree: 9d3f6d1d3fd4647c0df9bfe3efa4a6ab055a124a /numpy/lib/arraysetops.py
parent: cb69a9f6ce005671cd7b39ebb16d7469d958fa5a (diff)
download: numpy-669969980843dc207db170d99fa0884594c6bc7e.tar.gz
1 files changed, 201 insertions, 0 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
new file mode 100644
index 000000000..e767a9a15
--- /dev/null
+++ b/numpy/lib/arraysetops.py
@@ -0,0 +1,201 @@
+"""
+Set operations for 1D numeric arrays based on sort() function.
+
+Contains:
+  ediff1d,
+  unique1d,
+  intersect1d,
+  intersect1d_nu,
+  setxor1d,
+  setmember1d,
+  union1d,
+  setdiff1d
+
+Concerning the speed, test_unique1d_speed() reveals that up to 10000000
+elements unique1d() is about 10 times faster than the standard dictionary-based
+numpy.unique().
+
+Limitations: Except unique1d, union1d and intersect1d_nu, all functions expect
+inputs with unique elements. Speed could be gained in some operations by an
+implementaion of sort(), that can provide directly the permutation vectors,
+avoiding thus calls to argsort().
+
+To do: Optionally return indices analogously to unique1d for all functions.
+
+Author: Robert Cimrman
+"""
+__all__ = ['unique1d', 'intersect1d', 'intersect1d_nu', 'setxor1d',
+           'setmember1d', 'union1d', 'setdiff1d']
+
+
+# 02.11.2005, c
+import time
+import numpy
+
+##
+# 03.11.2005, c
+def ediff1d( ar1, toEnd = None, toBegin = None ):
+    """Array difference with prefixed and/or appended value."""
+    dar1 = ar1[1:] - ar1[:-1]
+    if toEnd and toBegin:
+        shape = (ar1.shape[0] + 1,) + ar1.shape[1:]
+        ed = numpy.empty( shape, dtype = ar1.dtype )
+        ed[0], ed[-1] = toBegin, toEnd
+        ed[1:-1] = dar1
+    elif toEnd:
+        ed = numpy.empty( ar1.shape, dtype = ar1.dtype )
+        ed[-1] = toEnd
+        ed[:-1] = dar1
+    elif toBegin:
+        ed = numpy.empty( ar1.shape, dtype = ar1.dtype )
+        ed[0] = toBegin
+        ed[1:] = dar1
+    else:
+        ed = dar1
+
+    return ed
+
+
+##
+# 01.11.2005, c
+# 02.11.2005
+def unique1d( ar1, retIndx = False ):
+    """Unique elements of 1D array. When retIndx is True, return also the
+    indices indx such that ar1[indx] is the resulting array of unique
+    elements."""
+    ar = numpy.array( ar1 ).ravel()
+    if retIndx:
+        perm = numpy.argsort( ar )
+        aux = numpy.take( ar, perm )
+        flag = ediff1d( aux, 1 ) != 0
+        return numpy.compress( flag, perm ), numpy.compress( flag, aux )
+    else:
+        aux = numpy.sort( ar )
+        return numpy.compress( ediff1d( aux, 1 ) != 0, aux ) 
+
+##
+# 01.11.2005, c
+def intersect1d( ar1, ar2 ):
+    """Intersection of 1D arrays with unique elements."""
+    aux = numpy.sort( numpy.concatenate( (ar1, ar2 ) ) )
+    return numpy.compress( (aux[1:] - aux[:-1]) == 0, aux )
+
+##
+# 01.11.2005, c
+def intersect1d_nu( ar1, ar2 ):
+    """Intersection of 1D arrays with any elements."""
+    # Might be faster then unique1d( intersect1d( ar1, ar2 ) )?
+    aux = numpy.sort( numpy.concatenate( (unique1d( ar1 ),
+                                          unique1d( ar2  )) ) )
+    return numpy.compress( (aux[1:] - aux[:-1]) == 0, aux )
+
+##
+# 01.11.2005, c
+def setxor1d( ar1, ar2 ):
+    """Set exclusive-or of 1D arrays with unique elements."""
+    aux = numpy.sort( numpy.concatenate( (ar1, ar2 ) ) )
+    flag = ediff1d( aux, toEnd = 1, toBegin = 1 ) == 0
+    flag2 = ediff1d( flag, 0 ) == 0
+    return numpy.compress( flag2, aux )
+
+##
+# 03.11.2005, c
+# 05.01.2006
+def setmember1d( ar1, ar2 ):
+    """Return an array of shape of ar1 containing 1 where the elements of
+    ar1 are in ar2 and 0 otherwise."""
+    ar = numpy.concatenate( (ar1, ar2 ) )
+    tt = numpy.concatenate( (numpy.zeros_like( ar1 ),
+                             numpy.zeros_like( ar2 ) + 1) )
+    perm = numpy.argsort( ar )
+    aux = numpy.take( ar, perm )
+    aux2 = numpy.take( tt, perm )
+    flag = ediff1d( aux, 1 ) == 0
+
+    ii = numpy.where( flag * aux2 )
+    aux = perm[ii+1]
+    perm[ii+1] = perm[ii]
+    perm[ii] = aux
+
+    indx = numpy.argsort( perm )[:len( ar1 )]
+
+    return numpy.take( flag, indx )
+
+##
+# 03.11.2005, c
+def union1d( ar1, ar2 ):
+    """Union of 1D arrays with unique elements."""
+    return unique1d( numpy.concatenate( (ar1, ar2) ) )
+
+##
+# 03.11.2005, c
+def setdiff1d( ar1, ar2 ):
+    """Set difference of 1D arrays with unique elements."""
+    aux = setmember1d( ar1, ar2 )
+    return numpy.compress( aux == 0, ar1 )
+
+##
+# 02.11.2005, c
+def test_unique1d_speed( plotResults = False ):
+#    exponents = numpy.linspace( 2, 7, 9 )
+    exponents = numpy.linspace( 2, 6, 9 )
+    ratios = []
+    nItems = []
+    dt1s = []
+    dt2s = []
+    for ii in exponents:
+
+        nItem = 10 ** ii
+        print 'using %d items:' % nItem
+        a = numpy.fix( nItem / 10 * numpy.random.random( nItem ) )
+
+        print 'dictionary:'
+        tt = time.clock() 
+        b = numpy.unique( a )
+        dt1 = time.clock() - tt
+        print dt1
+
+        print 'array:'
+        tt = time.clock() 
+        c = unique1d( a )
+        dt2 = time.clock() - tt
+        print dt2
+
+
+        if dt1 < 1e-8:
+            ratio = 'ND'
+        else:
+            ratio = dt2 / dt1
+        print 'ratio:', ratio
+        print 'nUnique: %d == %d\n' % (len( b ), len( c ))
+
+        nItems.append( nItem )
+        ratios.append( ratio )
+        dt1s.append( dt1 )
+        dt2s.append( dt2 )
+
+        assert numpy.alltrue( b == c )
+
+
+    print nItems
+    print dt1s
+    print dt2s
+    print ratios
+
+    if plotResults:
+        import pylab
+
+        def plotMe( fig, fun, nItems, dt1s, dt2s ):
+            pylab.figure( fig )
+            fun( nItems, dt1s, 'g-o', linewidth = 2, markersize = 8 )
+            fun( nItems, dt2s, 'b-x', linewidth = 2, markersize = 8 )
+            pylab.legend( ('dictionary', 'array' ) )
+            pylab.xlabel( 'nItem' )
+            pylab.ylabel( 'time [s]' )
+
+        plotMe( 1, pylab.loglog, nItems, dt1s, dt2s )
+        plotMe( 2, pylab.plot, nItems, dt1s, dt2s )
+        pylab.show()
+
+if (__name__ == '__main__'):
+    test_unique1d_speed( plotResults = True )
author	Robert Cimrman <cimrman3@ntc.zcu.cz>	2006-01-09 12:09:24 +0000
committer	Robert Cimrman <cimrman3@ntc.zcu.cz>	2006-01-09 12:09:24 +0000
commit	669969980843dc207db170d99fa0884594c6bc7e (patch)
tree	9d3f6d1d3fd4647c0df9bfe3efa4a6ab055a124a /numpy/lib/arraysetops.py
parent	cb69a9f6ce005671cd7b39ebb16d7469d958fa5a (diff)
download	numpy-669969980843dc207db170d99fa0884594c6bc7e.tar.gz