diff options
Diffstat (limited to 'scipy/weave/examples/cast_copy_transpose.py')
-rw-r--r-- | scipy/weave/examples/cast_copy_transpose.py | 182 |
1 files changed, 182 insertions, 0 deletions
diff --git a/scipy/weave/examples/cast_copy_transpose.py b/scipy/weave/examples/cast_copy_transpose.py new file mode 100644 index 000000000..207e104b4 --- /dev/null +++ b/scipy/weave/examples/cast_copy_transpose.py @@ -0,0 +1,182 @@ +""" Cast Copy Tranpose is used in scipy_base.numerix's LinearAlgebra.py to convert + C ordered arrays to Fortran order arrays before calling Fortran + functions. A couple of C implementations are provided here that + show modest speed improvements. One is an "inplace" transpose that + does an in memory transpose of an arrays elements. This is the + fastest approach and is beneficial if you don't need to keep the + original array. +""" +# C:\home\ej\wrk\scipy\compiler\examples>python cast_copy_transpose.py +# Cast/Copy/Transposing (150,150)array 1 times +# speed in python: 0.870999932289 +# speed in c: 0.25 +# speed up: 3.48 +# inplace transpose c: 0.129999995232 +# speed up: 6.70 + +import scipy_base.numerix +from scipy_base.numerix import * +import sys +sys.path.insert(0,'..') +import inline_tools +import c_spec +from converters import blitz as cblitz + +def _cast_copy_transpose(type,a_2d): + assert(len(shape(a_2d)) == 2) + new_array = zeros(shape(a_2d),type) + code = """ + for(int i = 0; i < Na_2d[0]; i++) + for(int j = 0; j < Na_2d[1]; j++) + new_array(i,j) = a_2d(j,i); + """ + inline_tools.inline(code,['new_array','a_2d'], + type_converters = cblitz, + compiler='gcc', + verbose = 1) + return new_array + +def _cast_copy_transpose2(type,a_2d): + assert(len(shape(a_2d)) == 2) + new_array = zeros(shape(a_2d),type) + code = """ + const int I = Na_2d[0]; + const int J = Na_2d[1]; + for(int i = 0; i < I; i++) + { + int new_off = i*J; + int old_off = i; + for(int j = 0; j < J; j++) + { + new_array[new_off++] = a_2d[old_off]; + old_off += I; + } + } + """ + inline_tools.inline(code,['new_array','a_2d'],compiler='gcc',verbose=1) + return new_array + +def _inplace_transpose(a_2d): + assert(len(shape(a_2d)) == 2) + numeric_type = c_spec.num_to_c_types[a_2d.typecode()] + code = """ + %s temp; + for(int i = 0; i < Na_2d[0]; i++) + for(int j = 0; j < Na_2d[1]; j++) + { + temp = a_2d(i,j); + a_2d(i,j) = a_2d(j,i); + a_2d(j,i) = temp; + } + """ % numeric_type + inline_tools.inline(code,['a_2d'], + type_converters = cblitz, + compiler='gcc', + extra_compile_args = ['-funroll-all-loops'], + verbose =2 ) + return a_2d + #assert(len(shape(a_2d)) == 2) + #type = a_2d.typecode() + #new_array = zeros(shape(a_2d),type) + ##trans_a_2d = transpose(a_2d) + #numeric_type = c_spec.num_to_c_types[type] + #code = """ + # for(int i = 0; i < Na_2d[0]; i++) + # for(int j = 0; j < Na_2d[1]; j++) + # new_array(i,j) = (%s) a_2d(j,i); + # """ % numeric_type + #inline_tools.inline(code,['new_array','a_2d'], + # type_converters = cblitz, + # compiler='gcc', + # verbose = 1) + #return new_array + +def cast_copy_transpose(type,*arrays): + results = [] + for a in arrays: + results.append(_cast_copy_transpose(type,a)) + if len(results) == 1: + return results[0] + else: + return results + +def cast_copy_transpose2(type,*arrays): + results = [] + for a in arrays: + results.append(_cast_copy_transpose2(type,a)) + if len(results) == 1: + return results[0] + else: + return results + +def inplace_cast_copy_transpose(*arrays): + results = [] + for a in arrays: + results.append(_inplace_transpose(a)) + if len(results) == 1: + return results[0] + else: + return results + +def _castCopyAndTranspose(type, *arrays): + cast_arrays = () + for a in arrays: + if a.typecode() == type: + cast_arrays = cast_arrays + (copy.copy(scipy_base.numerix.transpose(a)),) + else: + cast_arrays = cast_arrays + (copy.copy( + scipy_base.numerix.transpose(a).astype(type)),) + if len(cast_arrays) == 1: + return cast_arrays[0] + else: + return cast_arrays + +import time + + +def compare(m,n): + a = ones((n,n),Float64) + type = Float32 + print 'Cast/Copy/Transposing (%d,%d)array %d times' % (n,n,m) + t1 = time.time() + for i in range(m): + for i in range(n): + b = _castCopyAndTranspose(type,a) + t2 = time.time() + py = (t2-t1) + print ' speed in python:', (t2 - t1)/m + + + # load into cache + b = cast_copy_transpose(type,a) + t1 = time.time() + for i in range(m): + for i in range(n): + b = cast_copy_transpose(type,a) + t2 = time.time() + print ' speed in c (blitz):',(t2 - t1)/ m + print ' speed up (blitz): %3.2f' % (py/(t2-t1)) + + # load into cache + b = cast_copy_transpose2(type,a) + t1 = time.time() + for i in range(m): + for i in range(n): + b = cast_copy_transpose2(type,a) + t2 = time.time() + print ' speed in c (pointers):',(t2 - t1)/ m + print ' speed up (pointers): %3.2f' % (py/(t2-t1)) + + # inplace tranpose + b = _inplace_transpose(a) + t1 = time.time() + for i in range(m): + for i in range(n): + b = _inplace_transpose(a) + t2 = time.time() + print ' inplace transpose c:',(t2 - t1)/ m + print ' speed up: %3.2f' % (py/(t2-t1)) + +if __name__ == "__main__": + m,n = 1,500 + compare(m,n) |