diff options
Diffstat (limited to 'numpy/f2py/doc/multiarray')
-rw-r--r-- | numpy/f2py/doc/multiarray/array_from_pyobj.c | 323 | ||||
-rw-r--r-- | numpy/f2py/doc/multiarray/bar.c | 15 | ||||
-rw-r--r-- | numpy/f2py/doc/multiarray/foo.f | 13 | ||||
-rw-r--r-- | numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt | 284 | ||||
-rw-r--r-- | numpy/f2py/doc/multiarray/fun.pyf | 89 | ||||
-rw-r--r-- | numpy/f2py/doc/multiarray/run.pyf | 91 | ||||
-rw-r--r-- | numpy/f2py/doc/multiarray/transpose.txt | 1127 |
7 files changed, 1942 insertions, 0 deletions
diff --git a/numpy/f2py/doc/multiarray/array_from_pyobj.c b/numpy/f2py/doc/multiarray/array_from_pyobj.c new file mode 100644 index 000000000..7e0de9a74 --- /dev/null +++ b/numpy/f2py/doc/multiarray/array_from_pyobj.c @@ -0,0 +1,323 @@ +/* + * File: array_from_pyobj.c + * + * Description: + * ------------ + * Provides array_from_pyobj function that returns a contigious array + * object with the given dimensions and required storage order, either + * in row-major (C) or column-major (Fortran) order. The function + * array_from_pyobj is very flexible about its Python object argument + * that can be any number, list, tuple, or array. + * + * array_from_pyobj is used in f2py generated Python extension + * modules. + * + * Author: Pearu Peterson <pearu@cens.ioc.ee> + * Created: 13-16 January 2002 + * $Id: array_from_pyobj.c,v 1.1 2002/01/16 18:57:33 pearu Exp $ + */ + + +#define ARR_IS_NULL(arr,mess) \ +if (arr==NULL) { \ + fprintf(stderr,"array_from_pyobj:" mess); \ + return NULL; \ +} + +#define CHECK_DIMS_DEFINED(rank,dims,mess) \ +if (count_nonpos(rank,dims)) { \ + fprintf(stderr,"array_from_pyobj:" mess); \ + return NULL; \ +} + +#define HAS_PROPER_ELSIZE(arr,type_num) \ + ((PyArray_DescrFromType(type_num)->elsize) == (arr)->descr->elsize) + +/* static */ +/* void f2py_show_args(const int type_num, */ +/* const int *dims, */ +/* const int rank, */ +/* const int intent) { */ +/* int i; */ +/* fprintf(stderr,"array_from_pyobj:\n\ttype_num=%d\n\trank=%d\n\tintent=%d\n",\ */ +/* type_num,rank,intent); */ +/* for (i=0;i<rank;++i) */ +/* fprintf(stderr,"\tdims[%d]=%d\n",i,dims[i]); */ +/* } */ + +static +int count_nonpos(const int rank, + const int *dims) { + int i=0,r=0; + while (i<rank) { + if (dims[i] <= 0) ++r; + ++i; + } + return r; +} + +static void lazy_transpose(PyArrayObject* arr); +static int check_and_fix_dimensions(const PyArrayObject* arr, + const int rank, + int *dims); +static +int array_has_column_major_storage(const PyArrayObject *ap); + +static +PyArrayObject* array_from_pyobj(const int type_num, + int *dims, + const int rank, + const int intent, + PyObject *obj) { + /* Note about reference counting + ----------------------------- + If the caller returns the array to Python, it must be done with + Py_BuildValue("N",arr). + Otherwise, if obj!=arr then the caller must call Py_DECREF(arr). + */ + +/* f2py_show_args(type_num,dims,rank,intent); */ + + if (intent & F2PY_INTENT_CACHE) { + /* Don't expect correct storage order or anything reasonable when + returning cache array. */ + if ((intent & F2PY_INTENT_HIDE) + || (obj==Py_None)) { + PyArrayObject *arr = NULL; + CHECK_DIMS_DEFINED(rank,dims,"optional,intent(cache) must" + " have defined dimensions.\n"); + arr = (PyArrayObject *)PyArray_FromDims(rank,dims,type_num); + ARR_IS_NULL(arr,"FromDims failed: optional,intent(cache)\n"); + if (intent & F2PY_INTENT_OUT) + Py_INCREF(arr); + return arr; + } + if (PyArray_Check(obj) + && ISCONTIGUOUS((PyArrayObject *)obj) + && HAS_PROPER_ELSIZE((PyArrayObject *)obj,type_num) + ) { + if (check_and_fix_dimensions((PyArrayObject *)obj,rank,dims)) + return NULL; /*XXX: set exception */ + if (intent & F2PY_INTENT_OUT) + Py_INCREF(obj); + return (PyArrayObject *)obj; + } + ARR_IS_NULL(NULL,"intent(cache) must be contiguous array with a proper elsize.\n"); + } + + if (intent & F2PY_INTENT_HIDE) { + PyArrayObject *arr = NULL; + CHECK_DIMS_DEFINED(rank,dims,"intent(hide) must have defined dimensions.\n"); + arr = (PyArrayObject *)PyArray_FromDims(rank,dims,type_num); + ARR_IS_NULL(arr,"FromDims failed: intent(hide)\n"); + if (intent & F2PY_INTENT_OUT) { + if ((!(intent & F2PY_INTENT_C)) && (rank>1)) { + lazy_transpose(arr); + arr->flags &= ~CONTIGUOUS; + } + Py_INCREF(arr); + } + return arr; + } + + if (PyArray_Check(obj)) { /* here we have always intent(in) or + intent(inout) */ + + PyArrayObject *arr = (PyArrayObject *)obj; + int is_cont = (intent & F2PY_INTENT_C) ? + (ISCONTIGUOUS(arr)) : (array_has_column_major_storage(arr)); + + if (check_and_fix_dimensions(arr,rank,dims)) + return NULL; /*XXX: set exception */ + + if ((intent & F2PY_INTENT_COPY) + || (! (is_cont + && HAS_PROPER_ELSIZE(arr,type_num) + && PyArray_CanCastSafely(arr->descr->type_num,type_num)))) { + PyArrayObject *tmp_arr = NULL; + if (intent & F2PY_INTENT_INOUT) { + ARR_IS_NULL(NULL,"intent(inout) array must be contiguous and" + " with a proper type and size.\n") + } + if ((rank>1) && (! (intent & F2PY_INTENT_C))) + lazy_transpose(arr); + if (PyArray_CanCastSafely(arr->descr->type_num,type_num)) { + tmp_arr = (PyArrayObject *)PyArray_CopyFromObject(obj,type_num,0,0); + ARR_IS_NULL(arr,"CopyFromObject failed: array.\n"); + } else { + tmp_arr = (PyArrayObject *)PyArray_FromDims(arr->nd, + arr->dimensions, + type_num); + ARR_IS_NULL(tmp_arr,"FromDims failed: array with unsafe cast.\n"); + if (copy_ND_array(arr,tmp_arr)) + ARR_IS_NULL(NULL,"copy_ND_array failed: array with unsafe cast.\n"); + } + if ((rank>1) && (! (intent & F2PY_INTENT_C))) { + lazy_transpose(arr); + lazy_transpose(tmp_arr); + tmp_arr->flags &= ~CONTIGUOUS; + } + arr = tmp_arr; + } + if (intent & F2PY_INTENT_OUT) + Py_INCREF(arr); + return arr; + } + + if ((obj==Py_None) && (intent & F2PY_OPTIONAL)) { + PyArrayObject *arr = NULL; + CHECK_DIMS_DEFINED(rank,dims,"optional must have defined dimensions.\n"); + arr = (PyArrayObject *)PyArray_FromDims(rank,dims,type_num); + ARR_IS_NULL(arr,"FromDims failed: optional.\n"); + if (intent & F2PY_INTENT_OUT) { + if ((!(intent & F2PY_INTENT_C)) && (rank>1)) { + lazy_transpose(arr); + arr->flags &= ~CONTIGUOUS; + } + Py_INCREF(arr); + } + return arr; + } + + if (intent & F2PY_INTENT_INOUT) { + ARR_IS_NULL(NULL,"intent(inout) argument must be an array.\n"); + } + + { + PyArrayObject *arr = (PyArrayObject *) \ + PyArray_ContiguousFromObject(obj,type_num,0,0); + ARR_IS_NULL(arr,"ContiguousFromObject failed: not a sequence.\n"); + if (check_and_fix_dimensions(arr,rank,dims)) + return NULL; /*XXX: set exception */ + if ((rank>1) && (! (intent & F2PY_INTENT_C))) { + PyArrayObject *tmp_arr = NULL; + lazy_transpose(arr); + arr->flags &= ~CONTIGUOUS; + tmp_arr = (PyArrayObject *) PyArray_CopyFromObject((PyObject *)arr,type_num,0,0); + Py_DECREF(arr); + arr = tmp_arr; + ARR_IS_NULL(arr,"CopyFromObject(Array) failed: intent(fortran)\n"); + lazy_transpose(arr); + arr->flags &= ~CONTIGUOUS; + } + if (intent & F2PY_INTENT_OUT) + Py_INCREF(arr); + return arr; + } + +} + + /*****************************************/ + /* Helper functions for array_from_pyobj */ + /*****************************************/ + +static +int array_has_column_major_storage(const PyArrayObject *ap) { + /* array_has_column_major_storage(a) is equivalent to + transpose(a).iscontiguous() but more efficient. + + This function can be used in order to decide whether to use a + Fortran or C version of a wrapped function. This is relevant, for + example, in choosing a clapack or flapack function depending on + the storage order of array arguments. + */ + int sd; + int i; + sd = ap->descr->elsize; + for (i=0;i<ap->nd;++i) { + if (ap->dimensions[i] == 0) return 1; + if (ap->strides[i] != sd) return 0; + sd *= ap->dimensions[i]; + } + return 1; +} + +static +void lazy_transpose(PyArrayObject* arr) { + /* + Changes the order of array strides and dimensions. This + corresponds to the lazy transpose of a Numeric array in-situ. + Note that this function is assumed to be used even times for a + given array. Otherwise, the caller should set flags &= ~CONTIGUOUS. + */ + int rank,i,s,j; + rank = arr->nd; + if (rank < 2) return; + + for(i=0,j=rank-1;i<rank/2;++i,--j) { + s = arr->strides[i]; + arr->strides[i] = arr->strides[j]; + arr->strides[j] = s; + s = arr->dimensions[i]; + arr->dimensions[i] = arr->dimensions[j]; + arr->dimensions[j] = s; + } +} + +static +int check_and_fix_dimensions(const PyArrayObject* arr,const int rank,int *dims) { + /* + This function fills in blanks (that are -1's) in dims list using + the dimensions from arr. It also checks that non-blank dims will + match with the corresponding values in arr dimensions. + */ + const int arr_size = (arr->nd)?PyArray_Size((PyObject *)arr):1; + + if (rank > arr->nd) { /* [1,2] -> [[1],[2]]; 1 -> [[1]] */ + int new_size = 1; + int free_axe = -1; + int i; + /* Fill dims where -1 or 0; check dimensions; calc new_size; */ + for(i=0;i<arr->nd;++i) { + if (dims[i] >= 0) { + if (dims[i]!=arr->dimensions[i]) { + fprintf(stderr,"%d-th dimension must be fixed to %d but got %d\n", + i,dims[i],arr->dimensions[i]); + return 1; + } + if (!dims[i]) dims[i] = 1; + } else { + dims[i] = arr->dimensions[i] ? arr->dimensions[i] : 1; + } + new_size *= dims[i]; + } + for(i=arr->nd;i<rank;++i) + if (dims[i]>1) { + fprintf(stderr,"%d-th dimension must be %d but got 0 (not defined).\n", + i,dims[i]); + return 1; + } else if (free_axe<0) + free_axe = i; + else + dims[i] = 1; + if (free_axe>=0) { + dims[free_axe] = arr_size/new_size; + new_size *= dims[free_axe]; + } + if (new_size != arr_size) { + fprintf(stderr,"confused: new_size=%d, arr_size=%d (maybe too many free" + " indices)\n",new_size,arr_size); + return 1; + } + } else { + int i; + for (i=rank;i<arr->nd;++i) + if (arr->dimensions[i]>1) { + fprintf(stderr,"too many axes: %d, expected rank=%d\n",arr->nd,rank); + return 1; + } + for (i=0;i<rank;++i) + if (dims[i]>=0) { + if (arr->dimensions[i]!=dims[i]) { + fprintf(stderr,"%d-th dimension must be fixed to %d but got %d\n", + i,dims[i],arr->dimensions[i]); + return 1; + } + if (!dims[i]) dims[i] = 1; + } else + dims[i] = arr->dimensions[i]; + } + return 0; +} + +/* End of file: array_from_pyobj.c */ diff --git a/numpy/f2py/doc/multiarray/bar.c b/numpy/f2py/doc/multiarray/bar.c new file mode 100644 index 000000000..350636ea6 --- /dev/null +++ b/numpy/f2py/doc/multiarray/bar.c @@ -0,0 +1,15 @@ + +#include <stdio.h> + +void bar(int *a,int m,int n) { + int i,j; + printf("C:"); + printf("m=%d, n=%d\n",m,n); + for (i=0;i<m;++i) { + printf("Row %d:\n",i+1); + for (j=0;j<n;++j) + printf("a(i=%d,j=%d)=%d\n",i,j,a[n*i+j]); + } + if (m*n) + a[0] = 7777; +} diff --git a/numpy/f2py/doc/multiarray/foo.f b/numpy/f2py/doc/multiarray/foo.f new file mode 100644 index 000000000..f8c39c4d1 --- /dev/null +++ b/numpy/f2py/doc/multiarray/foo.f @@ -0,0 +1,13 @@ + subroutine foo(a,m,n) + integer a(m,n), m,n,i,j + print*, "F77:" + print*, "m=",m,", n=",n + do 100,i=1,m + print*, "Row ",i,":" + do 50,j=1,n + print*, "a(i=",i,",j=",j,") = ",a(i,j) + 50 continue + 100 continue + if (m*n.gt.0) a(1,1) = 77777 + end + diff --git a/numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt b/numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt new file mode 100644 index 000000000..c7b945c84 --- /dev/null +++ b/numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt @@ -0,0 +1,284 @@ + + _____________________________________________________________ + / Proposed internal structure for f2py generated extension \ + < modules regarding the issues with different storage-orders > + \ of multi-dimensional matrices in Fortran and C. / + ============================================================= + +Author: Pearu Peterson +Date: 14 January, 2001 + +Definitions: +============ + +In the following I will use the following definitions: + +1) A matrix is a mathematical object that represents a collection of + objects (elements), usually visualized in a table form, and one can + define a set of various (algebraic,etc) operations for matrices. + One can think of a matrix as a defintion of a certain mapping: + (i) |--> A(i) + where i belongs to the set of indices (an index itself can be a + sequence of objects, for example, a sequence of integers) and A(i) + is an element from a specified set, for example, a set of fruits. + Symbol A then denotes a matrix of fruits. + +2) An array is a storage object that represents a collection of + objects stored in a certain systematic way, for example, as an + ordered sequence in computer memory. + +In order to manipulate matrices using computers, one must store matrix +elements in computer memory. In the following, I will assume that the +elements of a matrix is stored as an array. There is no unique way in +which order one should save matrix elements in the array. However, in +C and Fortran programming languages, two, unfortunately different, +conventions are used. + +Aim: +==== + +The purpose of this writing is to work out an interface for Python +language so that C and Fortran routines can be called without +bothering about how multi-dimensional matrices are stored in memory. +For example, accessing a matrix element A[i,j] in Python will be +equivalent to accessing the same matrix in C, using A[i][j], or in +Fortran, using A(i,j). + +External conditions: +==================== + +In C programming language, it is custom to think that matrices are +stored in the so-called row-major order, that is, a matrix is stored +row by row, each row is as a contiguous array in computer memory. + +In Fortran programming language, matrices are stored in the +column-major order: each column is a contiguous array in computer +memory. + +In Python programming language, matrices can be stored using Python +Numeric array() function that uses internally C approach, that is, +elements of matrices are stored in row-major order. For example, +A = array([[1,2,3],[4,5,6]]) represents a 2-by-3 matrix + + / 1 2 3 \ + | | + \ 4 5 6 / + +and its elements are stored in computer memory as the following array: + + 1 2 3 4 5 6 + +The same matrix, if used in Fortran, would be stored in computer +memory as the following array: + + 1 4 2 5 3 6 + +Problem and solution: +===================== + +A problem arises if one wants to use the same matrix both in C and in +Fortran functions. Then the difference in storage order of a matrix +elements must be taken into account. This technical detail can be very +confusing even for an experienced programmer. This is because when +passing a matrix to a Fortran subroutine, you must (mentally or +programmically) transpose the matrix and when the subroutine returns, +you must transpose it back. + +As will be discussed below, there is a way to overcome these +difficulties in Python by creating an interface between Python and +Fortran code layers that takes care of this transition internally. So +that if you will read the manual pages of the Fortran codes, then you +need not to think about how matrices are actually stored, the storage +order will be the same, seemingly. + +Python / C / Fortran interface: +=============================== + +The interface between Python and Fortran codes will use the following +Python Numeric feature: transposing a Numeric array does not involve +copying of its data but just permuting the dimensions and strides of +the array (the so-called lazy transpose). + +However, when passing a Numeric array data pointer to Fortran or C +function, the data must be contiguous in memory. If it is not, then +data is rearranged inplace. I don't think that it can be avoided. +This is certainly a penalty hit to performance. However, one can +easily avoid it by creating a Numeric array with the right storage +order, so that after transposing, the array data will be contiguous in +memory and the data pointer can safely passed on to the Fortran +subroutine. This lazy-transpose operation will be done within the +interface and users need not to bother about this detail anymore (that +is, after they initialize Numeric array with matrix elements using the +proper order. Of course, the proper order depends on the target +function: C or Fortran). The interface should be smart enough to +minimize the need of real-transpose operations and the need to +additional memory storage as well. + +Statement of the problem: +========================= + +Consider a M-by-N matrix A of integers, where M and N are the number A +rows and columns, respectively. + +In Fortran language, the storing array of this matrix can be defined +as follows: + + integer A(M,N) + +in C: + + int A[M][N]; + +and in Python: + + A = Numeric.zeros((M,N),'i') + +Consider also the corresponding Fortran and C functions that +that use matrix arguments: + +Fortran: + subroutine FUN(A,M,N) + integer A(M,N) + ... + end +C: + void cun(int *a,int m,int n) { + ... + } + +and the corresponding Python interface signatures: + + def py_fun(a): + ... + def py_cun(a): + ... + +Main goal: +========== + +Our goal is to generate Python C/API functions py_fun and py_cun such +that their usage in Python would be identical. The cruical part of +their implementation are in functions that take a PyObject and +return a PyArrayObject such that it is contiguous and its data pointer +is suitable for passing on to the arguments of C or Fortran functions. +The prototypes of these functions are: + +PyArrayObject* fortran_array_from_pyobj( + int typecode, + int *dims, + int rank, + int intent, + PyObject *obj); + +and + +PyArrayObject* c_array_from_pyobj( + int typecode, + int *dims, + int rank, + int intent, + PyObject *obj); + +for wrapping Fortran and C functions, respectively. + +Pseudo-code for fortran_array_from_pyobj: +========================================= + +if type(obj) is ArrayType: + #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1]) + if obj.typecode is typecode: + if is_contiguous(obj): + transpose_data_inplace(obj) # real-transpose + set_transpose_strides(obj) # lazy-transpose + Py_INCREF(obj); + return obj + set_transpose_strides(obj) + if is_contiguous(obj): + set_transpose_strides(obj) + Py_INCREF(obj); + return obj + else: + tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0) + swap_datapointer_and_typeinfo(obj,tmp_obj) + Py_DECREF(tmp_obj); + set_transpose_strides(obj) + Py_INCREF(obj); + return obj + else: + tmp_obj = PyArray_FromDims(rank,dims,typecode) + set_transpose_strides(tmp_obj) + if intent in [in,inout]: + copy_ND_array(obj,tmp_obj) + swap_datapointer_and_typeinfo(obj,tmp_obj) + Py_DECREF(tmp_obj); + Py_INCREF(obj); + return obj +elif obj is None: # happens when only intent is 'hide' + tmp_obj = PyArray_FromDims(rank,dims,typecode) + if intent is out: + set_transpose_strides(tmp_obj) + # otherwise tmp_obj->data is used as a work array + Py_INCREF(tmp_obj) + return tmp_obj +else: + tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0) + #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1]) + set_transpose_strides(tmp_obj) + transpose_data_inplace(tmp_obj) + Py_INCREF(tmp_obj) + return tmp_obj + +Notes: + 1) CPU expensive tasks are in transpose_data_inplace and + copy_ND_array, PyArray_ContiguousFromObject. + 2) Memory expensive tasks are in PyArray_FromDims, + PyArray_ContiguousFromObject + 3) Side-effects are expected when set_transpose_strides and + transpose_data_inplace are used. For example: + >>> a = Numeric([[1,2,3],[4,5,6]],'d') + >>> a.is_contiguous() + 1 + >>> py_fun(a) + >>> a.typecode() + 'i' + >>> a.is_contiguous() + 0 + >>> transpose(a).is_contiguous() + 1 + +Pseudo-code for c_array_from_pyobj: +=================================== + +if type(obj) is ArrayType: + #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1]) + if obj.typecode is typecode: + if is_contiguous(obj): + Py_INCREF(obj); + return obj + else: + tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0) + swap_datapointer_and_typeinfo(obj,tmp_obj) + Py_DECREF(tmp_obj); + Py_INCREF(obj); + return obj + else: + tmp_obj = PyArray_FromDims(rank,dims,typecode) + if intent in [in,inout]: + copy_ND_array(obj,tmp_obj) + swap_datapointer_and_typeinfo(obj,tmp_obj) + Py_DECREF(tmp_obj); + Py_INCREF(obj); + return obj +elif obj is None: # happens when only intent is 'hide' + tmp_obj = PyArray_FromDims(rank,dims,typecode) + Py_INCREF(tmp_obj) + return tmp_obj +else: + tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0) + #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1]) + Py_INCREF(tmp_obj) + return tmp_obj + + +14 January, 2002 +Pearu Peterson <pearu@cens.ioc.ee>
\ No newline at end of file diff --git a/numpy/f2py/doc/multiarray/fun.pyf b/numpy/f2py/doc/multiarray/fun.pyf new file mode 100644 index 000000000..ed5d1923f --- /dev/null +++ b/numpy/f2py/doc/multiarray/fun.pyf @@ -0,0 +1,89 @@ +!%f90 -*- f90 -*- + +! Example: +! Using f2py for wrapping multi-dimensional Fortran and C arrays +! [NEW APPROACH, use it with f2py higher than 2.8.x] +! $Id: fun.pyf,v 1.3 2002/01/18 10:06:50 pearu Exp $ + +! Usage (with gcc compiler): +! f2py -c fun.pyf foo.f bar.c + +python module fun ! in + interface ! in :fun + +! >>> from Numeric import * +! >>> import fun +! >>> a=array([[1,2,3],[4,5,6]]) + + subroutine foo(a,m,n) ! in :fun:foo.f + integer dimension(m,n) :: a + intent(in,out,copy) :: a + integer optional,check(shape(a,0)==m),depend(a) :: m=shape(a,0) + integer optional,check(shape(a,1)==n),depend(a) :: n=shape(a,1) + end subroutine foo + +! >>> print fun.foo.__doc__ +! foo - Function signature: +! a = foo(a,[m,n]) +! Required arguments: +! a : input rank-2 array('i') with bounds (m,n) +! Optional arguments: +! m := shape(a,0) input int +! n := shape(a,1) input int +! Return objects: +! a : rank-2 array('i') with bounds (m,n) + +! >>> print fun.foo(a) +! F77: +! m= 2, n= 3 +! Row 1: +! a(i= 1,j= 1) = 1 +! a(i= 1,j= 2) = 2 +! a(i= 1,j= 3) = 3 +! Row 2: +! a(i= 2,j= 1) = 4 +! a(i= 2,j= 2) = 5 +! a(i= 2,j= 3) = 6 +! [[77777 2 3] +! [ 4 5 6]] + + + subroutine bar(a,m,n) + intent(c) + intent(c) bar + integer dimension(m,n) :: a + intent(in,out) :: a + integer optional,check(shape(a,0)==m),depend(a) :: m=shape(a,0) + integer optional,check(shape(a,1)==n),depend(a) :: n=shape(a,1) + intent(in) m,n + end subroutine bar + +! >>> print fun.bar.__doc__ +! bar - Function signature: +! a = bar(a,[m,n]) +! Required arguments: +! a : input rank-2 array('i') with bounds (m,n) +! Optional arguments: +! m := shape(a,0) input int +! n := shape(a,1) input int +! Return objects: +! a : rank-2 array('i') with bounds (m,n) + +! >>> print fun.bar(a) +! C:m=2, n=3 +! Row 1: +! a(i=0,j=0)=1 +! a(i=0,j=1)=2 +! a(i=0,j=2)=3 +! Row 2: +! a(i=1,j=0)=4 +! a(i=1,j=1)=5 +! a(i=1,j=2)=6 +! [[7777 2 3] +! [ 4 5 6]] + + end interface +end python module fun + +! This file was auto-generated with f2py (version:2.9.166). +! See http://cens.ioc.ee/projects/f2py2e/ diff --git a/numpy/f2py/doc/multiarray/run.pyf b/numpy/f2py/doc/multiarray/run.pyf new file mode 100644 index 000000000..bb12a439b --- /dev/null +++ b/numpy/f2py/doc/multiarray/run.pyf @@ -0,0 +1,91 @@ +!%f90 -*- f90 -*- + +! Example: +! Using f2py for wrapping multi-dimensional Fortran and C arrays +! [OLD APPROACH, do not use it with f2py higher than 2.8.x] +! $Id: run.pyf,v 1.1 2002/01/14 15:49:46 pearu Exp $ + +! Usage (with gcc compiler): +! f2py -c run.pyf foo.f bar.c -DNO_APPEND_FORTRAN + +python module run ! in + interface ! in :run + +! >>> from Numeric import * +! >>> import run +! >>> a=array([[1,2,3],[4,5,6]],'i') + + subroutine foo(a,m,n) + fortranname foo_ + integer dimension(m,n) :: a + integer optional,check(shape(a,1)==m),depend(a) :: m=shape(a,1) + integer optional,check(shape(a,0)==n),depend(a) :: n=shape(a,0) + end subroutine foo + +! >>> print run.foo.__doc__ +! foo - Function signature: +! foo(a,[m,n]) +! Required arguments: +! a : input rank-2 array('i') with bounds (n,m) +! Optional arguments: +! m := shape(a,1) input int +! n := shape(a,0) input int + +! >>> run.foo(a) +! F77: +! m= 3, n= 2 +! Row 1: +! a(i= 1,j= 1) = 1 +! a(i= 1,j= 2) = 4 +! Row 2: +! a(i= 2,j= 1) = 2 +! a(i= 2,j= 2) = 5 +! Row 3: +! a(i= 3,j= 1) = 3 +! a(i= 3,j= 2) = 6 + +! >>> run.foo(transpose(a)) +! F77: +! m= 2, n= 3 +! Row 1: +! a(i= 1,j= 1) = 1 +! a(i= 1,j= 2) = 2 +! a(i= 1,j= 3) = 3 +! Row 2: +! a(i= 2,j= 1) = 4 +! a(i= 2,j= 2) = 5 +! a(i= 2,j= 3) = 6 + + subroutine bar(a,m,n) + intent(c) + integer dimension(m,n) :: a + integer optional,check(shape(a,0)==m),depend(a) :: m=shape(a,0) + integer optional,check(shape(a,1)==n),depend(a) :: n=shape(a,1) + end subroutine bar + +! >>> print run.bar.__doc__ +! bar - Function signature: +! bar(a,[m,n]) +! Required arguments: +! a : rank-2 array('i') with bounds (m,n) +! Optional arguments: +! m := shape(a,0) int +! n := shape(a,1) int + +! >>> run.bar(a) +! C:m=2, n=3 +! Row 1: +! a(i=0,j=0)=1 +! a(i=0,j=1)=2 +! a(i=0,j=2)=3 +! Row 2: +! a(i=1,j=0)=4 +! a(i=1,j=1)=5 +! a(i=1,j=2)=6 + + + end interface +end python module run + +! This file was auto-generated with f2py (version:2.8.172). +! See http://cens.ioc.ee/projects/f2py2e/ diff --git a/numpy/f2py/doc/multiarray/transpose.txt b/numpy/f2py/doc/multiarray/transpose.txt new file mode 100644 index 000000000..a8d41e6df --- /dev/null +++ b/numpy/f2py/doc/multiarray/transpose.txt @@ -0,0 +1,1127 @@ +From: Phil Garner (garner@signal.dra.hmg.gb) + Subject: In place matrix transpose + Newsgroups: sci.math.num-analysis + Date: 1993-08-05 06:35:06 PST + + +Someone was talking about matrix transposes earlier on. It's a +curious subject. I found that an in-place transpose is about 12 times +slower than the trivial copying method. + +Here's somthing I nicked from netlib and translated into C to do the +in-place one for those that are interested: (matrix must be in one +block) + + +typedef float scalar; /* float -> double for double precision */ + +/* + * In Place Matrix Transpose + * From: Algorithm 380 collected algorithms from ACM. + * Converted to C by Phil Garner + * + * Algorithm appeared in comm. ACM, vol. 13, no. 05, + * p. 324. + */ +int trans(scalar *a, unsigned m, unsigned n, int *move, int iwrk) +{ + scalar b; + int i, j, k, i1, i2, ia, ib, ncount, kmi, Max, mn; + + /* + * a is a one-dimensional array of length mn=m*n, which + * contains the m by n matrix to be transposed. + * move is a one-dimensional array of length iwrk + * used to store information to speed up the process. the + * value iwrk=(m+n)/2 is recommended. Return val indicates the + * success or failure of the routine. + * normal return = 0 + * errors + * -2, iwrk negative or zero. + * ret > 0, (should never occur). in this case + * we set ret equal to the final value of i when the search + * is completed but some loops have not been moved. + * check arguments and initialise + */ + + /* Function Body */ + if (n < 2 || m < 2) + return 0; + if (iwrk < 1) + return -2; + + /* If matrix is square, exchange elements a(i,j) and a(j,i). */ + if (n == m) + { + for (i = 0; i < m - 1; ++i) + for (j = i + 1; j < m; ++j) + { + i1 = i + j * m; + i2 = j + i * m; + b = a[i1]; + a[i1] = a[i2]; + a[i2] = b; + } return 0; + } + + /* Non square matrix */ + ncount = 2; + for (i = 0; i < iwrk; ++i) + move[i] = 0; + + if (n > 2) + /* Count number,ncount, of single points. */ + for (ia = 1; ia < n - 1; ++ia) + { + ib = ia * (m - 1) / (n - 1); + if (ia * (m - 1) != ib * (n - 1)) + continue; + ++ncount; + i = ia * m + ib; + if (i > iwrk) + continue; + move[i] = 1; + } + + /* Set initial values for search. */ + mn = m * n; + k = mn - 1; + kmi = k - 1; + Max = mn; + i = 1; + + while (1) + { + /* Rearrange elements of a loop. */ + /* At least one loop must be re-arranged. */ + i1 = i; + while (1) + { + b = a[i1]; + while (1) + { + i2 = n * i1 - k * (i1 / m); + if (i1 <= iwrk) + move[i1 - 1] = 2; + ++ncount; + if (i2 == i || i2 >= kmi) + { + if (Max == kmi || i2 == i) + break; + Max = kmi; + } + a[i1] = a[i2]; + i1 = i2; + } + + /* Test for symmetric pair of loops. */ + a[i1] = b; + if (ncount >= mn) + return 0; + if (i2 == Max || Max == kmi) + break; + Max = kmi; + i1 = Max; + } + + /* Search for loops to be rearranged. */ + while (1) + { + Max = k - i; + ++i; + kmi = k - i; + if (i > Max) + return i; + if (i <= iwrk) + { + if (move[i-1] < 1) + break; + continue; + } + if (i == n * i - k * (i / m)) + continue; + i1 = i; + while (1) + { + i2 = n * i1 - k * (i1 / m); + if (i2 <= i || i2 >= Max) + break; + i1 = i2; + } + if (i2 == i) + break; + } + } /* End never reached */ +} + +-- + ,----------------------------- ______ + ____ | Phil Garner. \___| |/ \ \ ____ +/__/ `--, _L__L\_ | garner@signal.dra.hmg.gb | _|`---' \_/__/ `--, +`-0---0-' `-0--0-' `--OO-------------------O-----' `---0---' `-0---0-' + + From: Murray Dow (mld900@anusf.anu.edu.au) + Subject: Re: In place matrix transpose + Newsgroups: sci.math.num-analysis + Date: 1993-08-09 19:45:57 PST + + +In article <23qmp3INN3gl@mentor.dra.hmg.gb>, garner@signal.dra.hmg.gb (Phil Garner) writes: +|> Someone was talking about matrix transposes earlier on. It's a +|> curious subject. I found that an in-place transpose is about 12 times +|> slower than the trivial copying method. +|> + +Algorithm 380 from CACM is sloweer than ALG 467. Here are my times +from a VP2200 vector computer. Note that the CACM algorithms are scalar. +Times are in seconds, for a 900*904 matrix: + +380 NAG 467 disc copy +1.03 1.14 .391 .177 + +Compare two vector algortihms, one I wrote and the second a matrix +copy: + +My Alg Matrix copy +.0095 .0097 + +Conclusions: dont use Alg 380 from Netlib. If you have the available memory, +do a matrix copy. If you don't have the memory, I will send you my algorithm +when I have published it. +-- +Murray Dow GPO Box 4 Canberra ACT 2601 Australia +Supercomputer Facility Phone: +61 6 2495028 +Australian National University Fax: +61 6 2473425 +mld900@anusf.anu.edu.au + +============================================================================= + +From: Mark Smotherman (mark@hubcap.clemson.edu) + Subject: Matrix transpose benchmark [was Re: MIPS R8000 == TFP?] + Newsgroups: comp.arch, comp.benchmarks, comp.sys.super + Date: 1994-07-01 06:35:51 PST + + +mccalpin@perelandra.cms.udel.edu (John D. McCalpin) writes: + +> +>Of course, these results are all for the naive algorithm. I would be +>interested to see what an efficient blocked algorithm looks like. +>Anyone care to offer one? There is clearly a lot of performance +>to be gained by the effort.... + +Here is a matrix transpose benchmark generator. Enter something like + + 10d10eij; + +and you get a benchmark program with tiles of size 10 for the i and j +inner loops. Please email code improvements and flames. + +Enjoy! + + +/*--------------------------------------------------------------------------- + + Matrix Transpose Generator + + Copyright 1993, Dept. of Computer Science, Clemson University + + Permission to use, copy, modify, and distribute this software and + its documentation for any purpose and without fee is hereby granted, + provided that the above copyright notice appears in all copies. + + Clemson University and its Dept. of Computer Science make no + representations about the suitability of this software for any + purpose. It is provided "as is" without express or implied warranty. + + Original author: Mark Smotherman + + -------------------------------------------------------------------------*/ + + +/* tpgen.c version 1.0 + * + * generate a matrix transpose loop nest, with tiling and unrolling + * (timing code using getrusage is included in the generated program) + * + * mark smotherman + * mark@cs.clemson.edu + * clemson university + * 9 july 1993 + * + * a loop nest can be described by the order of its loop indices, so + * this program takes as input a simple language describing these indices: + * <number>d ==> generate tiling loop for index i with step size of <number> + * <number>e ==> generate tiling loop for index j with step size of <number> + * <number>i ==> generate loop for index i with unrolling factor of <number> + * <number>j ==> generate loop for index j with unrolling factor of <number> + * ; ==> input terminator (required) + * rules are: + * i,j tokens must appear + * if d appears, it must appear before i + * if e appears, it must appear before j + * ; must appear + * matrix size is controlled by #define N in this program. + * + * this code was adapted from mmgen.c v1.2 and extended to generate pre- + * condition loops for unrolling factors that do not evenly divide the + * matrix size (or the tiling step size for loop nests with a tiling loop). + * note that this program only provides a preconditioning loop for the + * innermost loop. unrolling factors for non-innermost loops that do not + * evenly divide the matrix size (or step size) are not supported. + * + * my interest in this program generator is to hook it to a sentence + * generator and a minimum execution time finder, that is + * while((sentence=sgen())!=NULL){ + * genprogram=tpgen(sentence); + * system("cc -O4 genprogram.c"); + * system("a.out >> tpresults"); + * } + * findmintime(tpresults); + * this will find the optimum algorithm for the host system via an + * exhaustive search. + * + * please report bugs and suggestions for enhancements to me. + */ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#define N 500 + +#define ALLOC1 temp1=(struct line *)malloc(sizeof(struct line));\ +temp1->indentcnt=indentcnt; + +#define LINK1 temp1->next=insertbefore;\ +insertafter->next=temp1;\ +insertafter=temp1; + +#define INSERT1 temp1->next=start;\ +start=temp1; + +#define ALLOC2 temp1=(struct line *)malloc(sizeof(struct line));\ +temp2=(struct line *)malloc(sizeof(struct line));\ +temp1->indentcnt=indentcnt;\ +temp2->indentcnt=indentcnt++; + +#define LINK2 temp1->next=temp2;\ +temp2->next=insertbefore;\ +insertafter->next=temp1;\ +insertafter=temp1;\ +insertbefore=temp2; + +struct line{ int indentcnt; char line[256]; struct line *next; }; + +int indentcnt; +int iflag,jflag; +int ijflag,jiflag; +int dflag,eflag; +int counter; +int iistep,jjstep; +int iunroll,junroll; +int precond; + +char c; +int i,ttp,nt; +char *p0; +char tptype[80]; +char number[10]; + +struct line *start,*head,*insertafter,*insertbefore,*temp1,*temp2; + +void processloop(); +void processstmt(); + +main(){ + + indentcnt=0; + iflag=jflag=0; + ijflag=jiflag=0; + dflag=eflag=0; + iunroll=junroll=0; + counter=1; + precond=0; + ttp=0; + + start=NULL; + ALLOC2 + sprintf(temp1->line,"/* begin */\nt_start=second();\n"); + sprintf(temp2->line,"/* end */\nt_end = second();\n"); + head=temp1; temp1->next=temp2; temp2->next=NULL; + insertafter=temp1; insertbefore=temp2; + + while((c=getchar())!=';'){ + tptype[ttp++]=c; + if(isdigit(c)){ + nt=0; + while(isdigit(c)){ + number[nt++]=c; + c=getchar(); + if(c==';'){ fprintf(stderr,"unexpected ;!\n"); exit(1); } + tptype[ttp++]=c; + } + number[nt]='\0'; + sscanf(number,"%d",&counter); + } + switch(c){ + case 'd': + if(iflag){ fprintf(stderr,"d cannot appear after i!\n"); exit(1); } + dflag++; + ALLOC1 + sprintf(temp1->line,"#define IISTEP %d\n",counter); + INSERT1 + iistep=counter; + counter=1; + ALLOC2 + sprintf(temp1->line,"for(ii=0;ii<%d;ii+=IISTEP){\n",N); + sprintf(temp2->line,"}\n",N); + LINK2 + ALLOC1 + sprintf(temp1->line,"it=min(ii+IISTEP,%d);\n",N); + LINK1 + break; + case 'e': + if(jflag){ fprintf(stderr,"e cannot appear after j!\n"); exit(1); } + eflag++; + ALLOC1 + sprintf(temp1->line,"#define JJSTEP %d\n",counter); + INSERT1 + jjstep=counter; + counter=1; + ALLOC2 + sprintf(temp1->line,"for(jj=0;jj<%d;jj+=JJSTEP){\n",N); + sprintf(temp2->line,"}\n",N); + LINK2 + ALLOC1 + sprintf(temp1->line,"jt=min(jj+JJSTEP,%d);\n",N); + LINK1 + break; + case 'i': + iunroll=counter; + counter=1; + iflag++; if(jflag) jiflag++; + if(dflag) precond=iistep%iunroll; else precond=N%iunroll; + if(precond&&(jiflag==0)){ + fprintf(stderr,"unrolling factor for outer loop i\n"); + fprintf(stderr," does not evenly divide matrix/step size!\n"); + exit(1); + } + if(dflag&&(iunroll>1)&&(N%iistep)){ + fprintf(stderr,"with unrolling of i, step size for tiled loop ii\n"); + fprintf(stderr," does not evenly divide matrix size!\n"); + exit(1); + } + processloop('i',dflag,iunroll,precond,junroll); + break; + case 'j': + junroll=counter; + counter=1; + jflag++; if(iflag) ijflag++; + if(eflag) precond=jjstep%junroll; else precond=N%junroll; + if(precond&&(ijflag==0)){ + fprintf(stderr,"unrolling factor for outer loop j\n"); + fprintf(stderr," does not evenly divide matrix/step size!\n"); + exit(1); + } + if(eflag&&(junroll>1)&&(N%jjstep)){ + fprintf(stderr,"with unrolling of j, step size for tiled loop jj\n"); + fprintf(stderr," does not evenly divide matrix size!\n"); + exit(1); + } + processloop('j',eflag,junroll,precond,iunroll); + break; + default: break; + } + } + processstmt(); + + tptype[ttp++]=c; + + if((iflag==0)||(jflag==0)){ + fprintf(stderr, + "one of the loops (i,j) was not specified!\n"); + exit(1); + } + + temp1=start; + while(temp1!=NULL){ + printf("%s",temp1->line); + temp1=temp1->next; + } + printf("#include <stdio.h>\n"); + printf("#include <sys/time.h>\n"); + printf("#include <sys/resource.h>\n"); + if(dflag|eflag) printf("#define min(a,b) ((a)<=(b)?(a):(b))\n"); + printf("double second();\n"); + printf("double t_start,t_end,t_total;\n"); + printf("int times;\n"); + printf("\ndouble b[%d][%d],dummy[10000],bt[%d][%d];\n\nmain(){\n" + ,N,N,N,N); + if(precond) printf(" int i,j,n;\n"); else printf(" int i,j;\n"); + if(dflag) printf(" int ii,it;\n"); + if(eflag) printf(" int jj,jt;\n"); + printf("/* set coefficients so that result matrix should have \n"); + printf(" * column entries equal to column index\n"); + printf(" */\n"); + printf(" for (i=0;i<%d;i++){\n",N); + printf(" for (j=0;j<%d;j++){\n",N); + printf(" b[i][j] = (double) i;\n"); + printf(" }\n"); + printf(" }\n"); + printf("\n t_total=0.0;\n for(times=0;times<10;times++){\n\n",N); + printf("/* try to flush cache */\n"); + printf(" for(i=0;i<10000;i++){\n",N); + printf(" dummy[i] = 0.0;\n"); + printf(" }\n"); + printf("%s",head->line); + temp1=head->next; + while(temp1!=NULL){ + for(i=0;i<temp1->indentcnt;i++) printf(" "); + while((p0=strstr(temp1->line,"+0"))!=NULL){ + *p0++=' '; *p0=' '; + } + printf("%s",temp1->line); + temp1=temp1->next; + } + printf("\n t_total+=t_end-t_start;\n }\n"); + printf("/* check result */\n"); + printf(" for (j=0;j<%d;j++){\n",N); + printf(" for (i=0;i<%d;i++){\n",N); + printf(" if (bt[i][j]!=((double)j)){\n"); + printf(" fprintf(stderr,\"error in bt[%cd][%cd]",'%','%'); + printf("\\n\",i,j);\n"); + printf(" fprintf(stderr,\" for %s\\n\");\n",tptype); + printf(" exit(1);\n"); + printf(" }\n"); + printf(" }\n"); + printf(" }\n"); + tptype[ttp]='\0'; + printf(" printf(\"%c10.2f secs\",t_total);\n",'%'); + printf(" printf(\" for 10 runs of %s\\n\");\n",tptype); + printf("}\n"); + printf("double second(){\n"); + printf(" void getrusage();\n"); + printf(" struct rusage ru;\n"); + printf(" double t;\n"); + printf(" getrusage(RUSAGE_SELF,&ru);\n"); + printf(" t = ((double)ru.ru_utime.tv_sec) +\n"); + printf(" ((double)ru.ru_utime.tv_usec)/1.0e6;\n"); + printf(" return t;\n"); + printf("}\n"); + +} + +void processloop(index,flag,unroll,precond,unroll2) +char index; +int flag,unroll,precond,unroll2; +{ + char build[80],temp[40]; + int n; + if(precond){ + ALLOC1 + sprintf(temp1->line,"/* preconditioning loop for unrolling factor */\n"); + LINK1 + if(unroll2==1){ + build[0]='\0'; + if(flag){ + if(index='i') + sprintf(temp,"n=IISTEP%c%d; ",'%',unroll); + else + sprintf(temp,"n=JJSTEP%c%d; ",'%',unroll); + strcat(build,temp); + sprintf(temp,"for(%c=%c%c;%c<%c%c+n;%c++) ",index,index,index, + index,index,index,index); + strcat(build,temp); + }else{ + sprintf(temp,"n=%d%c%d; ",N,'%',unroll); + strcat(build,temp); + sprintf(temp,"for(%c=0;%c<n;%c++) ",index,index,index); + strcat(build,temp); + } + sprintf(temp,"bt[i][j]=b[j][i];\n"); + strcat(build,temp); + ALLOC1 + sprintf(temp1->line,"%s\n",build); + LINK1 + }else{ + if(flag){ + ALLOC1 + if(index=='i') + sprintf(temp1->line,"n=IISTEP%c%d;\n",'%',unroll); + else + sprintf(temp1->line,"n=JJSTEP%c%d;\n",'%',unroll); + LINK1 + ALLOC1 + sprintf(temp1->line,"for(%c=%c%c;%c<%c%c+n;%c++){\n",index,index,index, + index,index,index,index); + LINK1 + }else{ + ALLOC1 + sprintf(temp1->line,"n=%d%c%d;\n",N,'%',unroll); + LINK1 + ALLOC1 + sprintf(temp1->line,"for(%c=0;%c<n;%c++){\n",index,index,index); + LINK1 + } + if(index=='i'){ + for(n=0;n<unroll2;n++){ + ALLOC1 + sprintf(temp1->line," bt[i][j+%d]=b[j+%d][i];\n",n,n); + LINK1 + } + }else{ + for(n=0;n<unroll2;n++){ + ALLOC1 + sprintf(temp1->line," bt[i+%d][j]=b[j][i+%d];\n",n,n); + LINK1 + } + } + ALLOC1 + sprintf(temp1->line,"}\n"); + LINK1 + } + ALLOC2 + if(flag){ + sprintf(temp1->line,"for(%c=%c%c+n;%c<%ct;%c+=%d){\n",index,index,index, + index,index,index,unroll); + }else{ + sprintf(temp1->line,"for(%c=n;%c<%d;%c+=%d){\n",index,index,N,index, + unroll); + } + sprintf(temp2->line,"}\n",N); + LINK2 + }else{ + ALLOC2 + if(unroll==1){ + if(flag){ + sprintf(temp1->line,"for(%c=%c%c;%c<%ct;%c++){\n",index,index,index, + index,index,index); + }else{ + sprintf(temp1->line,"for(%c=0;%c<%d;%c++){\n",index,index,N,index); + } + }else{ + if(flag){ + sprintf(temp1->line,"for(%c=%c%c;%c<%ct;%c+=%d){\n",index,index,index, + index,index,index,unroll); + }else{ + sprintf(temp1->line,"for(%c=0;%c<%d;%c+=%d){\n",index,index,N,index, + unroll); + } + } + sprintf(temp2->line,"}\n",N); + LINK2 + } +} + +void processstmt() +{ + int i,j; + for(i=0;i<iunroll;i++){ + for(j=0;j<junroll;j++){ + ALLOC1 + sprintf(temp1->line,"bt[i+%d][j+%d]=b[j+%d][i+%d];\n",i,j,j,i); + LINK1 + } + } +} +-- +Mark Smotherman, Computer Science Dept., Clemson University, Clemson, SC + +======================================================================= +From: has (h.genceli@bre.com) + Subject: transpose of a nxm matrix stored in a vector !!! + Newsgroups: sci.math.num-analysis + Date: 2000/07/25 + + +If I have a matrix nrows x ncols, I can store it in a vector. +so A(i,j) is really a[i*ncols+j]. So really TRANS of A +(say B) is really is also a vector B where + +0<=i b[j*nrows+i] <nrows, 0<=j<ncols +b[j*nrows+i] = a[i*ncols+j]. + +Fine but I want to use only one array a to do this transformation. + +i.e a[j*nrows+i] = a[i*ncols+j]. this will itself +erase some elements so each time a swap is necessary in a loop. + +temp = a[j*nrows+i] +a[j*nrows+i] = a[i*ncols+j] +a[i*ncols+j] = temp + +but still this will lose some info as it is, so indexing +should have more intelligence in it ???? anybody +can give me a lead here, thanks. + +Has + + From: wei-choon ng (wng@ux8.cso.uiuc.edu) + Subject: Re: transpose of a nxm matrix stored in a vector !!! + Newsgroups: sci.math.num-analysis + Date: 2000/07/25 + + +has <h.genceli@bre.com> wrote: +> If I have a matrix nrows x ncols, I can store it in a vector. +> so A(i,j) is really a[i*ncols+j]. So really TRANS of A +> (say B) is really is also a vector B where + +[snip] + +Hey, if you just want to do a transpose-matrix vector multiply, there is +no need to explicitly store the transpose matrix in another array and +doubling the storage! + +W.C. +-- + + From: Robin Becker (robin@jessikat.fsnet.co.uk) + Subject: Re: transpose of a nxm matrix stored in a vector !!! + Newsgroups: sci.math.num-analysis + Date: 2000/07/25 + + +In article <snr532fo3j1180@corp.supernews.com>, has <h.genceli@bre.com> +writes +>If I have a matrix nrows x ncols, I can store it in a vector. +>so A(i,j) is really a[i*ncols+j]. So really TRANS of A +>(say B) is really is also a vector B where +> +>0<=i b[j*nrows+i] <nrows, 0<=j<ncols +>b[j*nrows+i] = a[i*ncols+j]. +> +>Fine but I want to use only one array a to do this transformation. +> +>i.e a[j*nrows+i] = a[i*ncols+j]. this will itself +>erase some elements so each time a swap is necessary in a loop. +> +>temp = a[j*nrows+i] +>a[j*nrows+i] = a[i*ncols+j] +>a[i*ncols+j] = temp +> +>but still this will lose some info as it is, so indexing +>should have more intelligence in it ???? anybody +>can give me a lead here, thanks. +> +>Has +> +> +> + +void dmx_transpose(unsigned n, unsigned m, double* a, double* b) +{ + unsigned size = m*n; + if(b!=a){ + real *bmn, *aij, *anm; + bmn = b + size; /*b+n*m*/ + anm = a + size; + while(b<bmn) for(aij=a++;aij<anm; aij+=n ) *b++ = *aij; + } + else if(size>3){ + unsigned i,row,column,current; + for(i=1, size -= 2;i<size;i++){ + current = i; + do { + /*current = row+n*column*/ + column = current/m; + row = current%m; + current = n*row + column; + } while(current < i); + + if (current >i) { + real temp = a[i]; + a[i] = a[current]; + a[current] = temp; + } + } + } +} +-- +Robin Becker + + From: E. Robert Tisdale (edwin@netwood.net) + Subject: Re: transpose of a nxm matrix stored in a vector !!! + Newsgroups: sci.math.num-analysis + Date: 2000/07/25 + + +Take a look at +The C++ Scalar, Vector, Matrix and Tensor class library + + http://www.netwood.net/~edwin/svmt/ + +<Type><System>SubVector& + <Type><System>SubVector::transpose(Extent p, Extent q) { + <Type><System>SubVector& + v = *this; + if (1 < p && 1 < q) { + // A vector v of extent n = qp is viewed as a q by p matrix U and + // a p by q matrix V where U_{ij} = v_{p*i+j} and V_{ij} = v_{q*i+j}. + // The vector v is modified in-place so that V is the transpose of U. + // The algorithm searches for every sequence k_s of S indices + // such that a circular shift of elements v_{k_s} <-- v_{k_{s+1}} + // and v_{k_{S-1}} <-- v_{k_0} effects an in-place transpose. + Extent n = q*p; + Extent m = 0; // count up to n-2 + Offset l = 0; // 1 <= l <= n-2 + while (++l < n-1 && m < n-2) { + Offset k = l; + Offset j = k; + while (l < (k = (j%p)*q + j/p)) { // Search backward for k < l. + j = k; + } + // If a sequence of indices beginning with l has any index k < l, + // it has already been transposed. The sequence length S = 1 + // and diagonal element v_k is its own transpose if k = j. + // Skip every index sequence that has already been transposed. + if (k == l) { // a new sequence + if (k < j) { // with 1 < S + TYPE x = v[k]; // save v_{k_0} + do { + v[k] = v[j]; // v_{k_{s}} <-- v_{k_{s+1}} + k = j; + ++m; + } while (l < (j = (k%q)*p + k/q)); + v[k] = x; // v_{k_{S-1}} <-- v_{k_0} + } + ++m; + } + } + } return v; + } + + + +<Type><System>SubVector& + +Read the rest of this message... (50 more lines) + + From: Victor Eijkhout (eijkhout@disco.cs.utk.edu) + Subject: Re: transpose of a nxm matrix stored in a vector !!! + Newsgroups: sci.math.num-analysis + Date: 2000/07/25 + + +"Alan Miller" <amiller @ vic.bigpond.net.au> writes: + +> The attached routine does an in situ transpose. +> begin 666 Dtip.f90 +> M4U5"4D]55$E.12!D=&EP("AA+"!N,2P@;C(L(&YD:6TI#0HA("TM+2TM+2TM + +Hm. F90? You're not silently allocating a temporary I hope? + +(Why did you have to encode this? Now I have to save, this decode, ... +and all for plain ascii?) + +-- +Victor Eijkhout +"When I was coming up, [..] we knew exactly who the they were. It was us +versus them, and it was clear who the them was were. Today, we are not +so sure who the they are, but we know they're there." [G.W. Bush] + + From: Alan Miller (amiller_@_vic.bigpond.net.au) + Subject: Re: transpose of a nxm matrix stored in a vector !!! + Newsgroups: sci.math.num-analysis + Date: 2000/07/25 + + +Victor Eijkhout wrote in message ... +>"Alan Miller" <amiller @ vic.bigpond.net.au> writes: +> +>> The attached routine does an in situ transpose. +>> begin 666 Dtip.f90 +>> M4U5"4D]55$E.12!D=&EP("AA+"!N,2P@;C(L(&YD:6TI#0HA("TM+2TM+2TM +> +>Hm. F90? You're not silently allocating a temporary I hope? +> +>(Why did you have to encode this? Now I have to save, this decode, ... +>and all for plain ascii?) +> + +I know the problem. +I sometimes use a Unix system, and have to use decode64 to read +attachments. On the other hand, Windows wraps lines around, +formats then and generally makes the code unreadable. + +The straight code for dtip (double transpose in place) is attached +this time. + +>-- +>Victor Eijkhout + + +-- +Alan Miller, Retired Scientist (Statistician) +CSIRO Mathematical & Information Sciences +Alan.Miller -at- vic.cmis.csiro.au +http://www.ozemail.com.au/~milleraj +http://users.bigpond.net.au/amiller/ + + +================================================================= + +From: Darran Edmundson (dedmunds@sfu.ca) + Subject: array reordering algorithm? + Newsgroups: sci.math.num-analysis + Date: 1995/04/30 + + +A code I've written refers to a complex array as two separate real arrays. +However, I have a canned subroutine which expects a single array where the +real and imaginary values alternate. Essentially I have a case of mismatched +data structures, yet for reasons that I'd rather not go into, I'm stuck with them. + +Assuming that the two real arrays A and B are sequential in memory, and +that the single array of alternating real/imaginary values C shares the same +space, what I need is a porting subroutine that remaps the data from one format +to the other - using as little space as possible. + +I think of the problem as follows. Imagine an array of dimension 10 containing +the values 1,3,5,7,9,2,4,6,8,10 in this order. + + A(1) / 1 \ C(1) + A(2) | 3 | C(2) + A(3) | 5 | C(3) + A(4) | 7 | C(4) + A(5) \ 9 | C(5) + | + B(1) / 2 | C(6) + B(2) | 4 | C(7) + B(3) | 6 | C(8) + B(4) | 8 | C(9) + B(5) \ 10 / C(10) + +Given that I know this initial pattern, I want to sort the array C in-place *without +making comparisons*. That is, the algorithm can only depend on the initial +knowledge of the pattern. Do you see what a sort is going to do? It will +make the A and B arrays alternate, i.e. C(1)=A(1), C(2)=B(1), C(3)=A(2), +C(4)=B(2), etc. It's not a real sort though because I can't actually refer to the +values above (i.e. no comparisons) because A and B will be holding real data, +not this contrived pattern. The pattern above exists though - it's the +natural ordering in memory of A and B. + +Either pair swapping only or a small amount of workspace can be used. The +in-place is important - imagine scaling this problem up to an +array of 32 or 64 million double precision values and you can easily see how +duplicating the array is not a feasible solution. + +Any ideas? I've been stumped on this for a day and a half now. + +Darran Edmundson +dedmunds@sfu.ca + + From: Roger Critchlow (rec@elf115.elf.org) + Subject: Re: array reordering algorithm? + Newsgroups: sci.math.num-analysis + Date: 1995/04/30 + + + Any ideas? I've been stumped on this for a day and a half now. + +Here's some code for in situ permutations of arrays that I wrote +a few years ago. It all started from the in situ transposition +algorithms in the Collected Algorithms of the ACM, the references +for which always get lost during the decryption from fortran. + +This is the minimum space algorithm. All you need to supply is +a function which computes the new order array index from the old +order array index. + +If you can spare n*m bits to record the indexes of elements which +have been permuted, then you can speed things up. + +-- rec -- + +------------------------------------------------------------------------ +/* +** Arbitrary in situ permutations of an m by n array of base type TYPE. +** Copyright 1995 by Roger E Critchlow Jr, rec@elf.org, San Francisco, CA. +** Fair use permitted, caveat emptor. +*/ +typedef int TYPE; + +int transposition(int ij, int m, int n) /* transposition about diagonal from upper left to lower right */ +{ return ((ij%m)*n+ (ij/m)); } + +int countertrans(int ij, int m, int n) /* transposition about diagonal from upper right to lower left */ +{ return ((m-1-(ij%m))*n+ (n-1-(ij/m))); } + +int rotate90cw(int ij, int m, int n) /* 90 degree clockwise rotation */ +{ return ((m-1-(ij%m))*n+ (ij/m)); } + +int rotate90ccw(int ij, int m, int n) /* 90 degree counter clockwise rotation */ +{ return ((ij%m)*n+ (n-1-(ij/m))); } + +int rotate180(int ij, int m, int n) /* 180 degree rotation */ +{ return ((m-1-(ij/n))*n+ (n-1-(ij%n))); } + +int reflecth(int ij, int m, int n) /* reflection across horizontal plane */ +{ return ((m-1-(ij/n))*n+ (ij%n)); } + +int reflectv(int ij, int m, int n) /* reflection across vertical plane */ +{ return ((ij/n)*n+ (n-1-(ij%n))); } + +int in_situ_permutation(TYPE a[], int m, int n, int (*origination)(int ij, int m, int n)) +{ + int ij, oij, dij, n_to_do; + TYPE b; + n_to_do = m*n; + for (ij = 0; ij < m*n && n_to_do > 0; ij += 1) { + /* Test for previously permuted */ + for (oij = origination(ij,m,n); oij > ij; oij = origination(oij,m,n)) + ; + if (oij < ij) + continue; + /* Chase the cycle */ + dij = ij; + b = a[ij]; + for (oij = origination(dij,m,n); oij != ij; oij = origination(dij,m,n)) { + a[dij] = a[oij]; + dij = oij; + n_to_do -= 1; + } + a[dij] = b; + n_to_do -= 1; + } return 0; +} + +#define TESTING 1 +#if TESTING + +/* fill a matrix with sequential numbers, row major ordering */ +void fill_matrix_rows(a, m, n) TYPE *a; int m, n; +{ + int i, j; + for (i = 0; i < m; i += 1) + for (j = 0; j < n; j += 1) + a[i*n+j] = i*n+j; +} + +/* fill a matrix with sequential numbers, column major ordering */ +void fill_matrix_cols(a, m, n) TYPE *a; int m, n; +{ + int i, j; + for (i = 0; i < m; i += 1) + for (j = 0; j < n; j += 1) + a[i*n+j] = j*m+i; +} + +/* test a matrix for sequential numbers, row major ordering */ +int test_matrix_rows(a, m, n) TYPE *a; int m, n; +{ + int i, j, o; + for (o = i = 0; i < m; i += 1) + for (j = 0; j < n; j += 1) + o += a[i*n+j] != i*n+j; + return o; +} + +/* test a matrix for sequential numbers, column major ordering */ +int test_matrix_cols(a, m, n) TYPE *a; int m, n; +{ + int i, j, o; + for (o = i = 0; i < m; i += 1) + for (j = 0; j < n; j += 1) + o += a[i*n+j] != j*m+i; + return o; +} + +/* print a matrix */ +void print_matrix(a, m, n) TYPE *a; int m, n; +{ + char *format; + int i, j; + if (m*n < 10) format = "%2d"; + if (m*n < 100) format = "%3d"; + if (m*n < 1000) format = "%4d"; + if (m*n < 10000) format = "%5d"; + for (i = 0; i < m; i += 1) { + for (j = 0; j < n; j += 1) + printf(format, a[i*n+j]); + printf("\n"); + } +} + +#if TEST_TRANSPOSE +#define MAXSIZE 1000 + +main() +{ + int i, j, m, n, o; + TYPE a[MAXSIZE]; + for (m = 1; m < sizeof(a)/sizeof(a[0]); m += 1) + for (n = 1; m*n < sizeof(a)/sizeof(a[0]); n += 1) { + fill_matrix_rows(a, m, n); /* {0 1} {2 3} */ + if (o = transpose(a, m, n)) + printf(">> transpose returned %d for a[%d][%d], row major\n", o, m, n); + if ((o = test_matrix_cols(a, n, m)) != 0) /* {0 2} {1 3} */ + printf(">> transpose made %d mistakes for a[%d][%d], row major\n", o, m, n); + /* column major */ + fill_matrix_rows(a, m, n); + if (o = transpose(a, m, n)) + printf(">> transpose returned %d for a[%d][%d], column major\n", o, m, n); + if ((o = test_matrix_cols(a, n, m)) != 0) + printf(">> transpose made %d mistakes for a[%d][%d], column major\n", o, m, n); + } return 0; +} +#endif /* TEST_TRANSPOSE */ + + +#define TEST_DISPLAY 1 +#if TEST_DISPLAY +main(argc, argv) int argc; char *argv[]; +{ + TYPE *a; + int m = 5, n = 5; + extern void *malloc(); + if (argc > 1) { + m = atoi(argv[1]); + if (argc > 2) + n = atoi(argv[2]); + } + a = malloc(m*n*sizeof(TYPE)); + + printf("matrix\n"); + fill_matrix_rows(a, m, n); + print_matrix(a, m, n); + printf("transposition\n"); + in_situ_permutation(a, m, n, transposition); + print_matrix(a, n, m); + + printf("counter transposition\n"); + fill_matrix_rows(a, m, n); + in_situ_permutation(a, m, n, countertrans); + print_matrix(a, n, m); + + printf("rotate 90 degrees clockwise\n"); + fill_matrix_rows(a, m, n); + in_situ_permutation(a, m, n, rotate90cw); + print_matrix(a, n, m); + + printf("rotate 90 degrees counterclockwise\n"); + fill_matrix_rows(a, m, n); + in_situ_permutation(a, m, n, rotate90ccw); + print_matrix(a, n, m); + + printf("rotate 180 degrees\n"); + fill_matrix_rows(a, m, n); + in_situ_permutation(a, m, n, rotate180); + print_matrix(a, m, n); + + printf("reflect across horizontal\n"); + fill_matrix_rows(a, m, n); + in_situ_permutation(a, m, n, reflecth); + print_matrix(a, m, n); + + printf("reflect across vertical\n"); + fill_matrix_rows(a, m, n); + in_situ_permutation(a, m, n, reflectv); + print_matrix(a, m, n); + + return 0; +} + +#endif +#endif + |