summaryrefslogtreecommitdiff
path: root/numpy/f2py/doc/multiarray
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/f2py/doc/multiarray')
-rw-r--r--numpy/f2py/doc/multiarray/array_from_pyobj.c323
-rw-r--r--numpy/f2py/doc/multiarray/bar.c15
-rw-r--r--numpy/f2py/doc/multiarray/foo.f13
-rw-r--r--numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt284
-rw-r--r--numpy/f2py/doc/multiarray/fun.pyf89
-rw-r--r--numpy/f2py/doc/multiarray/run.pyf91
-rw-r--r--numpy/f2py/doc/multiarray/transpose.txt1127
7 files changed, 1942 insertions, 0 deletions
diff --git a/numpy/f2py/doc/multiarray/array_from_pyobj.c b/numpy/f2py/doc/multiarray/array_from_pyobj.c
new file mode 100644
index 000000000..7e0de9a74
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/array_from_pyobj.c
@@ -0,0 +1,323 @@
+/*
+ * File: array_from_pyobj.c
+ *
+ * Description:
+ * ------------
+ * Provides array_from_pyobj function that returns a contigious array
+ * object with the given dimensions and required storage order, either
+ * in row-major (C) or column-major (Fortran) order. The function
+ * array_from_pyobj is very flexible about its Python object argument
+ * that can be any number, list, tuple, or array.
+ *
+ * array_from_pyobj is used in f2py generated Python extension
+ * modules.
+ *
+ * Author: Pearu Peterson <pearu@cens.ioc.ee>
+ * Created: 13-16 January 2002
+ * $Id: array_from_pyobj.c,v 1.1 2002/01/16 18:57:33 pearu Exp $
+ */
+
+
+#define ARR_IS_NULL(arr,mess) \
+if (arr==NULL) { \
+ fprintf(stderr,"array_from_pyobj:" mess); \
+ return NULL; \
+}
+
+#define CHECK_DIMS_DEFINED(rank,dims,mess) \
+if (count_nonpos(rank,dims)) { \
+ fprintf(stderr,"array_from_pyobj:" mess); \
+ return NULL; \
+}
+
+#define HAS_PROPER_ELSIZE(arr,type_num) \
+ ((PyArray_DescrFromType(type_num)->elsize) == (arr)->descr->elsize)
+
+/* static */
+/* void f2py_show_args(const int type_num, */
+/* const int *dims, */
+/* const int rank, */
+/* const int intent) { */
+/* int i; */
+/* fprintf(stderr,"array_from_pyobj:\n\ttype_num=%d\n\trank=%d\n\tintent=%d\n",\ */
+/* type_num,rank,intent); */
+/* for (i=0;i<rank;++i) */
+/* fprintf(stderr,"\tdims[%d]=%d\n",i,dims[i]); */
+/* } */
+
+static
+int count_nonpos(const int rank,
+ const int *dims) {
+ int i=0,r=0;
+ while (i<rank) {
+ if (dims[i] <= 0) ++r;
+ ++i;
+ }
+ return r;
+}
+
+static void lazy_transpose(PyArrayObject* arr);
+static int check_and_fix_dimensions(const PyArrayObject* arr,
+ const int rank,
+ int *dims);
+static
+int array_has_column_major_storage(const PyArrayObject *ap);
+
+static
+PyArrayObject* array_from_pyobj(const int type_num,
+ int *dims,
+ const int rank,
+ const int intent,
+ PyObject *obj) {
+ /* Note about reference counting
+ -----------------------------
+ If the caller returns the array to Python, it must be done with
+ Py_BuildValue("N",arr).
+ Otherwise, if obj!=arr then the caller must call Py_DECREF(arr).
+ */
+
+/* f2py_show_args(type_num,dims,rank,intent); */
+
+ if (intent & F2PY_INTENT_CACHE) {
+ /* Don't expect correct storage order or anything reasonable when
+ returning cache array. */
+ if ((intent & F2PY_INTENT_HIDE)
+ || (obj==Py_None)) {
+ PyArrayObject *arr = NULL;
+ CHECK_DIMS_DEFINED(rank,dims,"optional,intent(cache) must"
+ " have defined dimensions.\n");
+ arr = (PyArrayObject *)PyArray_FromDims(rank,dims,type_num);
+ ARR_IS_NULL(arr,"FromDims failed: optional,intent(cache)\n");
+ if (intent & F2PY_INTENT_OUT)
+ Py_INCREF(arr);
+ return arr;
+ }
+ if (PyArray_Check(obj)
+ && ISCONTIGUOUS((PyArrayObject *)obj)
+ && HAS_PROPER_ELSIZE((PyArrayObject *)obj,type_num)
+ ) {
+ if (check_and_fix_dimensions((PyArrayObject *)obj,rank,dims))
+ return NULL; /*XXX: set exception */
+ if (intent & F2PY_INTENT_OUT)
+ Py_INCREF(obj);
+ return (PyArrayObject *)obj;
+ }
+ ARR_IS_NULL(NULL,"intent(cache) must be contiguous array with a proper elsize.\n");
+ }
+
+ if (intent & F2PY_INTENT_HIDE) {
+ PyArrayObject *arr = NULL;
+ CHECK_DIMS_DEFINED(rank,dims,"intent(hide) must have defined dimensions.\n");
+ arr = (PyArrayObject *)PyArray_FromDims(rank,dims,type_num);
+ ARR_IS_NULL(arr,"FromDims failed: intent(hide)\n");
+ if (intent & F2PY_INTENT_OUT) {
+ if ((!(intent & F2PY_INTENT_C)) && (rank>1)) {
+ lazy_transpose(arr);
+ arr->flags &= ~CONTIGUOUS;
+ }
+ Py_INCREF(arr);
+ }
+ return arr;
+ }
+
+ if (PyArray_Check(obj)) { /* here we have always intent(in) or
+ intent(inout) */
+
+ PyArrayObject *arr = (PyArrayObject *)obj;
+ int is_cont = (intent & F2PY_INTENT_C) ?
+ (ISCONTIGUOUS(arr)) : (array_has_column_major_storage(arr));
+
+ if (check_and_fix_dimensions(arr,rank,dims))
+ return NULL; /*XXX: set exception */
+
+ if ((intent & F2PY_INTENT_COPY)
+ || (! (is_cont
+ && HAS_PROPER_ELSIZE(arr,type_num)
+ && PyArray_CanCastSafely(arr->descr->type_num,type_num)))) {
+ PyArrayObject *tmp_arr = NULL;
+ if (intent & F2PY_INTENT_INOUT) {
+ ARR_IS_NULL(NULL,"intent(inout) array must be contiguous and"
+ " with a proper type and size.\n")
+ }
+ if ((rank>1) && (! (intent & F2PY_INTENT_C)))
+ lazy_transpose(arr);
+ if (PyArray_CanCastSafely(arr->descr->type_num,type_num)) {
+ tmp_arr = (PyArrayObject *)PyArray_CopyFromObject(obj,type_num,0,0);
+ ARR_IS_NULL(arr,"CopyFromObject failed: array.\n");
+ } else {
+ tmp_arr = (PyArrayObject *)PyArray_FromDims(arr->nd,
+ arr->dimensions,
+ type_num);
+ ARR_IS_NULL(tmp_arr,"FromDims failed: array with unsafe cast.\n");
+ if (copy_ND_array(arr,tmp_arr))
+ ARR_IS_NULL(NULL,"copy_ND_array failed: array with unsafe cast.\n");
+ }
+ if ((rank>1) && (! (intent & F2PY_INTENT_C))) {
+ lazy_transpose(arr);
+ lazy_transpose(tmp_arr);
+ tmp_arr->flags &= ~CONTIGUOUS;
+ }
+ arr = tmp_arr;
+ }
+ if (intent & F2PY_INTENT_OUT)
+ Py_INCREF(arr);
+ return arr;
+ }
+
+ if ((obj==Py_None) && (intent & F2PY_OPTIONAL)) {
+ PyArrayObject *arr = NULL;
+ CHECK_DIMS_DEFINED(rank,dims,"optional must have defined dimensions.\n");
+ arr = (PyArrayObject *)PyArray_FromDims(rank,dims,type_num);
+ ARR_IS_NULL(arr,"FromDims failed: optional.\n");
+ if (intent & F2PY_INTENT_OUT) {
+ if ((!(intent & F2PY_INTENT_C)) && (rank>1)) {
+ lazy_transpose(arr);
+ arr->flags &= ~CONTIGUOUS;
+ }
+ Py_INCREF(arr);
+ }
+ return arr;
+ }
+
+ if (intent & F2PY_INTENT_INOUT) {
+ ARR_IS_NULL(NULL,"intent(inout) argument must be an array.\n");
+ }
+
+ {
+ PyArrayObject *arr = (PyArrayObject *) \
+ PyArray_ContiguousFromObject(obj,type_num,0,0);
+ ARR_IS_NULL(arr,"ContiguousFromObject failed: not a sequence.\n");
+ if (check_and_fix_dimensions(arr,rank,dims))
+ return NULL; /*XXX: set exception */
+ if ((rank>1) && (! (intent & F2PY_INTENT_C))) {
+ PyArrayObject *tmp_arr = NULL;
+ lazy_transpose(arr);
+ arr->flags &= ~CONTIGUOUS;
+ tmp_arr = (PyArrayObject *) PyArray_CopyFromObject((PyObject *)arr,type_num,0,0);
+ Py_DECREF(arr);
+ arr = tmp_arr;
+ ARR_IS_NULL(arr,"CopyFromObject(Array) failed: intent(fortran)\n");
+ lazy_transpose(arr);
+ arr->flags &= ~CONTIGUOUS;
+ }
+ if (intent & F2PY_INTENT_OUT)
+ Py_INCREF(arr);
+ return arr;
+ }
+
+}
+
+ /*****************************************/
+ /* Helper functions for array_from_pyobj */
+ /*****************************************/
+
+static
+int array_has_column_major_storage(const PyArrayObject *ap) {
+ /* array_has_column_major_storage(a) is equivalent to
+ transpose(a).iscontiguous() but more efficient.
+
+ This function can be used in order to decide whether to use a
+ Fortran or C version of a wrapped function. This is relevant, for
+ example, in choosing a clapack or flapack function depending on
+ the storage order of array arguments.
+ */
+ int sd;
+ int i;
+ sd = ap->descr->elsize;
+ for (i=0;i<ap->nd;++i) {
+ if (ap->dimensions[i] == 0) return 1;
+ if (ap->strides[i] != sd) return 0;
+ sd *= ap->dimensions[i];
+ }
+ return 1;
+}
+
+static
+void lazy_transpose(PyArrayObject* arr) {
+ /*
+ Changes the order of array strides and dimensions. This
+ corresponds to the lazy transpose of a Numeric array in-situ.
+ Note that this function is assumed to be used even times for a
+ given array. Otherwise, the caller should set flags &= ~CONTIGUOUS.
+ */
+ int rank,i,s,j;
+ rank = arr->nd;
+ if (rank < 2) return;
+
+ for(i=0,j=rank-1;i<rank/2;++i,--j) {
+ s = arr->strides[i];
+ arr->strides[i] = arr->strides[j];
+ arr->strides[j] = s;
+ s = arr->dimensions[i];
+ arr->dimensions[i] = arr->dimensions[j];
+ arr->dimensions[j] = s;
+ }
+}
+
+static
+int check_and_fix_dimensions(const PyArrayObject* arr,const int rank,int *dims) {
+ /*
+ This function fills in blanks (that are -1's) in dims list using
+ the dimensions from arr. It also checks that non-blank dims will
+ match with the corresponding values in arr dimensions.
+ */
+ const int arr_size = (arr->nd)?PyArray_Size((PyObject *)arr):1;
+
+ if (rank > arr->nd) { /* [1,2] -> [[1],[2]]; 1 -> [[1]] */
+ int new_size = 1;
+ int free_axe = -1;
+ int i;
+ /* Fill dims where -1 or 0; check dimensions; calc new_size; */
+ for(i=0;i<arr->nd;++i) {
+ if (dims[i] >= 0) {
+ if (dims[i]!=arr->dimensions[i]) {
+ fprintf(stderr,"%d-th dimension must be fixed to %d but got %d\n",
+ i,dims[i],arr->dimensions[i]);
+ return 1;
+ }
+ if (!dims[i]) dims[i] = 1;
+ } else {
+ dims[i] = arr->dimensions[i] ? arr->dimensions[i] : 1;
+ }
+ new_size *= dims[i];
+ }
+ for(i=arr->nd;i<rank;++i)
+ if (dims[i]>1) {
+ fprintf(stderr,"%d-th dimension must be %d but got 0 (not defined).\n",
+ i,dims[i]);
+ return 1;
+ } else if (free_axe<0)
+ free_axe = i;
+ else
+ dims[i] = 1;
+ if (free_axe>=0) {
+ dims[free_axe] = arr_size/new_size;
+ new_size *= dims[free_axe];
+ }
+ if (new_size != arr_size) {
+ fprintf(stderr,"confused: new_size=%d, arr_size=%d (maybe too many free"
+ " indices)\n",new_size,arr_size);
+ return 1;
+ }
+ } else {
+ int i;
+ for (i=rank;i<arr->nd;++i)
+ if (arr->dimensions[i]>1) {
+ fprintf(stderr,"too many axes: %d, expected rank=%d\n",arr->nd,rank);
+ return 1;
+ }
+ for (i=0;i<rank;++i)
+ if (dims[i]>=0) {
+ if (arr->dimensions[i]!=dims[i]) {
+ fprintf(stderr,"%d-th dimension must be fixed to %d but got %d\n",
+ i,dims[i],arr->dimensions[i]);
+ return 1;
+ }
+ if (!dims[i]) dims[i] = 1;
+ } else
+ dims[i] = arr->dimensions[i];
+ }
+ return 0;
+}
+
+/* End of file: array_from_pyobj.c */
diff --git a/numpy/f2py/doc/multiarray/bar.c b/numpy/f2py/doc/multiarray/bar.c
new file mode 100644
index 000000000..350636ea6
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/bar.c
@@ -0,0 +1,15 @@
+
+#include <stdio.h>
+
+void bar(int *a,int m,int n) {
+ int i,j;
+ printf("C:");
+ printf("m=%d, n=%d\n",m,n);
+ for (i=0;i<m;++i) {
+ printf("Row %d:\n",i+1);
+ for (j=0;j<n;++j)
+ printf("a(i=%d,j=%d)=%d\n",i,j,a[n*i+j]);
+ }
+ if (m*n)
+ a[0] = 7777;
+}
diff --git a/numpy/f2py/doc/multiarray/foo.f b/numpy/f2py/doc/multiarray/foo.f
new file mode 100644
index 000000000..f8c39c4d1
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/foo.f
@@ -0,0 +1,13 @@
+ subroutine foo(a,m,n)
+ integer a(m,n), m,n,i,j
+ print*, "F77:"
+ print*, "m=",m,", n=",n
+ do 100,i=1,m
+ print*, "Row ",i,":"
+ do 50,j=1,n
+ print*, "a(i=",i,",j=",j,") = ",a(i,j)
+ 50 continue
+ 100 continue
+ if (m*n.gt.0) a(1,1) = 77777
+ end
+
diff --git a/numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt b/numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt
new file mode 100644
index 000000000..c7b945c84
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/fortran_array_from_pyobj.txt
@@ -0,0 +1,284 @@
+
+ _____________________________________________________________
+ / Proposed internal structure for f2py generated extension \
+ < modules regarding the issues with different storage-orders >
+ \ of multi-dimensional matrices in Fortran and C. /
+ =============================================================
+
+Author: Pearu Peterson
+Date: 14 January, 2001
+
+Definitions:
+============
+
+In the following I will use the following definitions:
+
+1) A matrix is a mathematical object that represents a collection of
+ objects (elements), usually visualized in a table form, and one can
+ define a set of various (algebraic,etc) operations for matrices.
+ One can think of a matrix as a defintion of a certain mapping:
+ (i) |--> A(i)
+ where i belongs to the set of indices (an index itself can be a
+ sequence of objects, for example, a sequence of integers) and A(i)
+ is an element from a specified set, for example, a set of fruits.
+ Symbol A then denotes a matrix of fruits.
+
+2) An array is a storage object that represents a collection of
+ objects stored in a certain systematic way, for example, as an
+ ordered sequence in computer memory.
+
+In order to manipulate matrices using computers, one must store matrix
+elements in computer memory. In the following, I will assume that the
+elements of a matrix is stored as an array. There is no unique way in
+which order one should save matrix elements in the array. However, in
+C and Fortran programming languages, two, unfortunately different,
+conventions are used.
+
+Aim:
+====
+
+The purpose of this writing is to work out an interface for Python
+language so that C and Fortran routines can be called without
+bothering about how multi-dimensional matrices are stored in memory.
+For example, accessing a matrix element A[i,j] in Python will be
+equivalent to accessing the same matrix in C, using A[i][j], or in
+Fortran, using A(i,j).
+
+External conditions:
+====================
+
+In C programming language, it is custom to think that matrices are
+stored in the so-called row-major order, that is, a matrix is stored
+row by row, each row is as a contiguous array in computer memory.
+
+In Fortran programming language, matrices are stored in the
+column-major order: each column is a contiguous array in computer
+memory.
+
+In Python programming language, matrices can be stored using Python
+Numeric array() function that uses internally C approach, that is,
+elements of matrices are stored in row-major order. For example,
+A = array([[1,2,3],[4,5,6]]) represents a 2-by-3 matrix
+
+ / 1 2 3 \
+ | |
+ \ 4 5 6 /
+
+and its elements are stored in computer memory as the following array:
+
+ 1 2 3 4 5 6
+
+The same matrix, if used in Fortran, would be stored in computer
+memory as the following array:
+
+ 1 4 2 5 3 6
+
+Problem and solution:
+=====================
+
+A problem arises if one wants to use the same matrix both in C and in
+Fortran functions. Then the difference in storage order of a matrix
+elements must be taken into account. This technical detail can be very
+confusing even for an experienced programmer. This is because when
+passing a matrix to a Fortran subroutine, you must (mentally or
+programmically) transpose the matrix and when the subroutine returns,
+you must transpose it back.
+
+As will be discussed below, there is a way to overcome these
+difficulties in Python by creating an interface between Python and
+Fortran code layers that takes care of this transition internally. So
+that if you will read the manual pages of the Fortran codes, then you
+need not to think about how matrices are actually stored, the storage
+order will be the same, seemingly.
+
+Python / C / Fortran interface:
+===============================
+
+The interface between Python and Fortran codes will use the following
+Python Numeric feature: transposing a Numeric array does not involve
+copying of its data but just permuting the dimensions and strides of
+the array (the so-called lazy transpose).
+
+However, when passing a Numeric array data pointer to Fortran or C
+function, the data must be contiguous in memory. If it is not, then
+data is rearranged inplace. I don't think that it can be avoided.
+This is certainly a penalty hit to performance. However, one can
+easily avoid it by creating a Numeric array with the right storage
+order, so that after transposing, the array data will be contiguous in
+memory and the data pointer can safely passed on to the Fortran
+subroutine. This lazy-transpose operation will be done within the
+interface and users need not to bother about this detail anymore (that
+is, after they initialize Numeric array with matrix elements using the
+proper order. Of course, the proper order depends on the target
+function: C or Fortran). The interface should be smart enough to
+minimize the need of real-transpose operations and the need to
+additional memory storage as well.
+
+Statement of the problem:
+=========================
+
+Consider a M-by-N matrix A of integers, where M and N are the number A
+rows and columns, respectively.
+
+In Fortran language, the storing array of this matrix can be defined
+as follows:
+
+ integer A(M,N)
+
+in C:
+
+ int A[M][N];
+
+and in Python:
+
+ A = Numeric.zeros((M,N),'i')
+
+Consider also the corresponding Fortran and C functions that
+that use matrix arguments:
+
+Fortran:
+ subroutine FUN(A,M,N)
+ integer A(M,N)
+ ...
+ end
+C:
+ void cun(int *a,int m,int n) {
+ ...
+ }
+
+and the corresponding Python interface signatures:
+
+ def py_fun(a):
+ ...
+ def py_cun(a):
+ ...
+
+Main goal:
+==========
+
+Our goal is to generate Python C/API functions py_fun and py_cun such
+that their usage in Python would be identical. The cruical part of
+their implementation are in functions that take a PyObject and
+return a PyArrayObject such that it is contiguous and its data pointer
+is suitable for passing on to the arguments of C or Fortran functions.
+The prototypes of these functions are:
+
+PyArrayObject* fortran_array_from_pyobj(
+ int typecode,
+ int *dims,
+ int rank,
+ int intent,
+ PyObject *obj);
+
+and
+
+PyArrayObject* c_array_from_pyobj(
+ int typecode,
+ int *dims,
+ int rank,
+ int intent,
+ PyObject *obj);
+
+for wrapping Fortran and C functions, respectively.
+
+Pseudo-code for fortran_array_from_pyobj:
+=========================================
+
+if type(obj) is ArrayType:
+ #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1])
+ if obj.typecode is typecode:
+ if is_contiguous(obj):
+ transpose_data_inplace(obj) # real-transpose
+ set_transpose_strides(obj) # lazy-transpose
+ Py_INCREF(obj);
+ return obj
+ set_transpose_strides(obj)
+ if is_contiguous(obj):
+ set_transpose_strides(obj)
+ Py_INCREF(obj);
+ return obj
+ else:
+ tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0)
+ swap_datapointer_and_typeinfo(obj,tmp_obj)
+ Py_DECREF(tmp_obj);
+ set_transpose_strides(obj)
+ Py_INCREF(obj);
+ return obj
+ else:
+ tmp_obj = PyArray_FromDims(rank,dims,typecode)
+ set_transpose_strides(tmp_obj)
+ if intent in [in,inout]:
+ copy_ND_array(obj,tmp_obj)
+ swap_datapointer_and_typeinfo(obj,tmp_obj)
+ Py_DECREF(tmp_obj);
+ Py_INCREF(obj);
+ return obj
+elif obj is None: # happens when only intent is 'hide'
+ tmp_obj = PyArray_FromDims(rank,dims,typecode)
+ if intent is out:
+ set_transpose_strides(tmp_obj)
+ # otherwise tmp_obj->data is used as a work array
+ Py_INCREF(tmp_obj)
+ return tmp_obj
+else:
+ tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0)
+ #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1])
+ set_transpose_strides(tmp_obj)
+ transpose_data_inplace(tmp_obj)
+ Py_INCREF(tmp_obj)
+ return tmp_obj
+
+Notes:
+ 1) CPU expensive tasks are in transpose_data_inplace and
+ copy_ND_array, PyArray_ContiguousFromObject.
+ 2) Memory expensive tasks are in PyArray_FromDims,
+ PyArray_ContiguousFromObject
+ 3) Side-effects are expected when set_transpose_strides and
+ transpose_data_inplace are used. For example:
+ >>> a = Numeric([[1,2,3],[4,5,6]],'d')
+ >>> a.is_contiguous()
+ 1
+ >>> py_fun(a)
+ >>> a.typecode()
+ 'i'
+ >>> a.is_contiguous()
+ 0
+ >>> transpose(a).is_contiguous()
+ 1
+
+Pseudo-code for c_array_from_pyobj:
+===================================
+
+if type(obj) is ArrayType:
+ #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1])
+ if obj.typecode is typecode:
+ if is_contiguous(obj):
+ Py_INCREF(obj);
+ return obj
+ else:
+ tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0)
+ swap_datapointer_and_typeinfo(obj,tmp_obj)
+ Py_DECREF(tmp_obj);
+ Py_INCREF(obj);
+ return obj
+ else:
+ tmp_obj = PyArray_FromDims(rank,dims,typecode)
+ if intent in [in,inout]:
+ copy_ND_array(obj,tmp_obj)
+ swap_datapointer_and_typeinfo(obj,tmp_obj)
+ Py_DECREF(tmp_obj);
+ Py_INCREF(obj);
+ return obj
+elif obj is None: # happens when only intent is 'hide'
+ tmp_obj = PyArray_FromDims(rank,dims,typecode)
+ Py_INCREF(tmp_obj)
+ return tmp_obj
+else:
+ tmp_obj = PyArray_ContiguousFromObject(obj,typecode,0,0)
+ #raise not check(len(ravel(obj)) >= dims[0]*dims[1]*...*dims[rank-1])
+ Py_INCREF(tmp_obj)
+ return tmp_obj
+
+
+14 January, 2002
+Pearu Peterson <pearu@cens.ioc.ee> \ No newline at end of file
diff --git a/numpy/f2py/doc/multiarray/fun.pyf b/numpy/f2py/doc/multiarray/fun.pyf
new file mode 100644
index 000000000..ed5d1923f
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/fun.pyf
@@ -0,0 +1,89 @@
+!%f90 -*- f90 -*-
+
+! Example:
+! Using f2py for wrapping multi-dimensional Fortran and C arrays
+! [NEW APPROACH, use it with f2py higher than 2.8.x]
+! $Id: fun.pyf,v 1.3 2002/01/18 10:06:50 pearu Exp $
+
+! Usage (with gcc compiler):
+! f2py -c fun.pyf foo.f bar.c
+
+python module fun ! in
+ interface ! in :fun
+
+! >>> from Numeric import *
+! >>> import fun
+! >>> a=array([[1,2,3],[4,5,6]])
+
+ subroutine foo(a,m,n) ! in :fun:foo.f
+ integer dimension(m,n) :: a
+ intent(in,out,copy) :: a
+ integer optional,check(shape(a,0)==m),depend(a) :: m=shape(a,0)
+ integer optional,check(shape(a,1)==n),depend(a) :: n=shape(a,1)
+ end subroutine foo
+
+! >>> print fun.foo.__doc__
+! foo - Function signature:
+! a = foo(a,[m,n])
+! Required arguments:
+! a : input rank-2 array('i') with bounds (m,n)
+! Optional arguments:
+! m := shape(a,0) input int
+! n := shape(a,1) input int
+! Return objects:
+! a : rank-2 array('i') with bounds (m,n)
+
+! >>> print fun.foo(a)
+! F77:
+! m= 2, n= 3
+! Row 1:
+! a(i= 1,j= 1) = 1
+! a(i= 1,j= 2) = 2
+! a(i= 1,j= 3) = 3
+! Row 2:
+! a(i= 2,j= 1) = 4
+! a(i= 2,j= 2) = 5
+! a(i= 2,j= 3) = 6
+! [[77777 2 3]
+! [ 4 5 6]]
+
+
+ subroutine bar(a,m,n)
+ intent(c)
+ intent(c) bar
+ integer dimension(m,n) :: a
+ intent(in,out) :: a
+ integer optional,check(shape(a,0)==m),depend(a) :: m=shape(a,0)
+ integer optional,check(shape(a,1)==n),depend(a) :: n=shape(a,1)
+ intent(in) m,n
+ end subroutine bar
+
+! >>> print fun.bar.__doc__
+! bar - Function signature:
+! a = bar(a,[m,n])
+! Required arguments:
+! a : input rank-2 array('i') with bounds (m,n)
+! Optional arguments:
+! m := shape(a,0) input int
+! n := shape(a,1) input int
+! Return objects:
+! a : rank-2 array('i') with bounds (m,n)
+
+! >>> print fun.bar(a)
+! C:m=2, n=3
+! Row 1:
+! a(i=0,j=0)=1
+! a(i=0,j=1)=2
+! a(i=0,j=2)=3
+! Row 2:
+! a(i=1,j=0)=4
+! a(i=1,j=1)=5
+! a(i=1,j=2)=6
+! [[7777 2 3]
+! [ 4 5 6]]
+
+ end interface
+end python module fun
+
+! This file was auto-generated with f2py (version:2.9.166).
+! See http://cens.ioc.ee/projects/f2py2e/
diff --git a/numpy/f2py/doc/multiarray/run.pyf b/numpy/f2py/doc/multiarray/run.pyf
new file mode 100644
index 000000000..bb12a439b
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/run.pyf
@@ -0,0 +1,91 @@
+!%f90 -*- f90 -*-
+
+! Example:
+! Using f2py for wrapping multi-dimensional Fortran and C arrays
+! [OLD APPROACH, do not use it with f2py higher than 2.8.x]
+! $Id: run.pyf,v 1.1 2002/01/14 15:49:46 pearu Exp $
+
+! Usage (with gcc compiler):
+! f2py -c run.pyf foo.f bar.c -DNO_APPEND_FORTRAN
+
+python module run ! in
+ interface ! in :run
+
+! >>> from Numeric import *
+! >>> import run
+! >>> a=array([[1,2,3],[4,5,6]],'i')
+
+ subroutine foo(a,m,n)
+ fortranname foo_
+ integer dimension(m,n) :: a
+ integer optional,check(shape(a,1)==m),depend(a) :: m=shape(a,1)
+ integer optional,check(shape(a,0)==n),depend(a) :: n=shape(a,0)
+ end subroutine foo
+
+! >>> print run.foo.__doc__
+! foo - Function signature:
+! foo(a,[m,n])
+! Required arguments:
+! a : input rank-2 array('i') with bounds (n,m)
+! Optional arguments:
+! m := shape(a,1) input int
+! n := shape(a,0) input int
+
+! >>> run.foo(a)
+! F77:
+! m= 3, n= 2
+! Row 1:
+! a(i= 1,j= 1) = 1
+! a(i= 1,j= 2) = 4
+! Row 2:
+! a(i= 2,j= 1) = 2
+! a(i= 2,j= 2) = 5
+! Row 3:
+! a(i= 3,j= 1) = 3
+! a(i= 3,j= 2) = 6
+
+! >>> run.foo(transpose(a))
+! F77:
+! m= 2, n= 3
+! Row 1:
+! a(i= 1,j= 1) = 1
+! a(i= 1,j= 2) = 2
+! a(i= 1,j= 3) = 3
+! Row 2:
+! a(i= 2,j= 1) = 4
+! a(i= 2,j= 2) = 5
+! a(i= 2,j= 3) = 6
+
+ subroutine bar(a,m,n)
+ intent(c)
+ integer dimension(m,n) :: a
+ integer optional,check(shape(a,0)==m),depend(a) :: m=shape(a,0)
+ integer optional,check(shape(a,1)==n),depend(a) :: n=shape(a,1)
+ end subroutine bar
+
+! >>> print run.bar.__doc__
+! bar - Function signature:
+! bar(a,[m,n])
+! Required arguments:
+! a : rank-2 array('i') with bounds (m,n)
+! Optional arguments:
+! m := shape(a,0) int
+! n := shape(a,1) int
+
+! >>> run.bar(a)
+! C:m=2, n=3
+! Row 1:
+! a(i=0,j=0)=1
+! a(i=0,j=1)=2
+! a(i=0,j=2)=3
+! Row 2:
+! a(i=1,j=0)=4
+! a(i=1,j=1)=5
+! a(i=1,j=2)=6
+
+
+ end interface
+end python module run
+
+! This file was auto-generated with f2py (version:2.8.172).
+! See http://cens.ioc.ee/projects/f2py2e/
diff --git a/numpy/f2py/doc/multiarray/transpose.txt b/numpy/f2py/doc/multiarray/transpose.txt
new file mode 100644
index 000000000..a8d41e6df
--- /dev/null
+++ b/numpy/f2py/doc/multiarray/transpose.txt
@@ -0,0 +1,1127 @@
+From: Phil Garner (garner@signal.dra.hmg.gb)
+ Subject: In place matrix transpose
+ Newsgroups: sci.math.num-analysis
+ Date: 1993-08-05 06:35:06 PST
+
+
+Someone was talking about matrix transposes earlier on. It's a
+curious subject. I found that an in-place transpose is about 12 times
+slower than the trivial copying method.
+
+Here's somthing I nicked from netlib and translated into C to do the
+in-place one for those that are interested: (matrix must be in one
+block)
+
+
+typedef float scalar; /* float -> double for double precision */
+
+/*
+ * In Place Matrix Transpose
+ * From: Algorithm 380 collected algorithms from ACM.
+ * Converted to C by Phil Garner
+ *
+ * Algorithm appeared in comm. ACM, vol. 13, no. 05,
+ * p. 324.
+ */
+int trans(scalar *a, unsigned m, unsigned n, int *move, int iwrk)
+{
+ scalar b;
+ int i, j, k, i1, i2, ia, ib, ncount, kmi, Max, mn;
+
+ /*
+ * a is a one-dimensional array of length mn=m*n, which
+ * contains the m by n matrix to be transposed.
+ * move is a one-dimensional array of length iwrk
+ * used to store information to speed up the process. the
+ * value iwrk=(m+n)/2 is recommended. Return val indicates the
+ * success or failure of the routine.
+ * normal return = 0
+ * errors
+ * -2, iwrk negative or zero.
+ * ret > 0, (should never occur). in this case
+ * we set ret equal to the final value of i when the search
+ * is completed but some loops have not been moved.
+ * check arguments and initialise
+ */
+
+ /* Function Body */
+ if (n < 2 || m < 2)
+ return 0;
+ if (iwrk < 1)
+ return -2;
+
+ /* If matrix is square, exchange elements a(i,j) and a(j,i). */
+ if (n == m)
+ {
+ for (i = 0; i < m - 1; ++i)
+ for (j = i + 1; j < m; ++j)
+ {
+ i1 = i + j * m;
+ i2 = j + i * m;
+ b = a[i1];
+ a[i1] = a[i2];
+ a[i2] = b;
+ } return 0;
+ }
+
+ /* Non square matrix */
+ ncount = 2;
+ for (i = 0; i < iwrk; ++i)
+ move[i] = 0;
+
+ if (n > 2)
+ /* Count number,ncount, of single points. */
+ for (ia = 1; ia < n - 1; ++ia)
+ {
+ ib = ia * (m - 1) / (n - 1);
+ if (ia * (m - 1) != ib * (n - 1))
+ continue;
+ ++ncount;
+ i = ia * m + ib;
+ if (i > iwrk)
+ continue;
+ move[i] = 1;
+ }
+
+ /* Set initial values for search. */
+ mn = m * n;
+ k = mn - 1;
+ kmi = k - 1;
+ Max = mn;
+ i = 1;
+
+ while (1)
+ {
+ /* Rearrange elements of a loop. */
+ /* At least one loop must be re-arranged. */
+ i1 = i;
+ while (1)
+ {
+ b = a[i1];
+ while (1)
+ {
+ i2 = n * i1 - k * (i1 / m);
+ if (i1 <= iwrk)
+ move[i1 - 1] = 2;
+ ++ncount;
+ if (i2 == i || i2 >= kmi)
+ {
+ if (Max == kmi || i2 == i)
+ break;
+ Max = kmi;
+ }
+ a[i1] = a[i2];
+ i1 = i2;
+ }
+
+ /* Test for symmetric pair of loops. */
+ a[i1] = b;
+ if (ncount >= mn)
+ return 0;
+ if (i2 == Max || Max == kmi)
+ break;
+ Max = kmi;
+ i1 = Max;
+ }
+
+ /* Search for loops to be rearranged. */
+ while (1)
+ {
+ Max = k - i;
+ ++i;
+ kmi = k - i;
+ if (i > Max)
+ return i;
+ if (i <= iwrk)
+ {
+ if (move[i-1] < 1)
+ break;
+ continue;
+ }
+ if (i == n * i - k * (i / m))
+ continue;
+ i1 = i;
+ while (1)
+ {
+ i2 = n * i1 - k * (i1 / m);
+ if (i2 <= i || i2 >= Max)
+ break;
+ i1 = i2;
+ }
+ if (i2 == i)
+ break;
+ }
+ } /* End never reached */
+}
+
+--
+ ,----------------------------- ______
+ ____ | Phil Garner. \___| |/ \ \ ____
+/__/ `--, _L__L\_ | garner@signal.dra.hmg.gb | _|`---' \_/__/ `--,
+`-0---0-' `-0--0-' `--OO-------------------O-----' `---0---' `-0---0-'
+
+ From: Murray Dow (mld900@anusf.anu.edu.au)
+ Subject: Re: In place matrix transpose
+ Newsgroups: sci.math.num-analysis
+ Date: 1993-08-09 19:45:57 PST
+
+
+In article <23qmp3INN3gl@mentor.dra.hmg.gb>, garner@signal.dra.hmg.gb (Phil Garner) writes:
+|> Someone was talking about matrix transposes earlier on. It's a
+|> curious subject. I found that an in-place transpose is about 12 times
+|> slower than the trivial copying method.
+|>
+
+Algorithm 380 from CACM is sloweer than ALG 467. Here are my times
+from a VP2200 vector computer. Note that the CACM algorithms are scalar.
+Times are in seconds, for a 900*904 matrix:
+
+380 NAG 467 disc copy
+1.03 1.14 .391 .177
+
+Compare two vector algortihms, one I wrote and the second a matrix
+copy:
+
+My Alg Matrix copy
+.0095 .0097
+
+Conclusions: dont use Alg 380 from Netlib. If you have the available memory,
+do a matrix copy. If you don't have the memory, I will send you my algorithm
+when I have published it.
+--
+Murray Dow GPO Box 4 Canberra ACT 2601 Australia
+Supercomputer Facility Phone: +61 6 2495028
+Australian National University Fax: +61 6 2473425
+mld900@anusf.anu.edu.au
+
+=============================================================================
+
+From: Mark Smotherman (mark@hubcap.clemson.edu)
+ Subject: Matrix transpose benchmark [was Re: MIPS R8000 == TFP?]
+ Newsgroups: comp.arch, comp.benchmarks, comp.sys.super
+ Date: 1994-07-01 06:35:51 PST
+
+
+mccalpin@perelandra.cms.udel.edu (John D. McCalpin) writes:
+
+>
+>Of course, these results are all for the naive algorithm. I would be
+>interested to see what an efficient blocked algorithm looks like.
+>Anyone care to offer one? There is clearly a lot of performance
+>to be gained by the effort....
+
+Here is a matrix transpose benchmark generator. Enter something like
+
+ 10d10eij;
+
+and you get a benchmark program with tiles of size 10 for the i and j
+inner loops. Please email code improvements and flames.
+
+Enjoy!
+
+
+/*---------------------------------------------------------------------------
+
+ Matrix Transpose Generator
+
+ Copyright 1993, Dept. of Computer Science, Clemson University
+
+ Permission to use, copy, modify, and distribute this software and
+ its documentation for any purpose and without fee is hereby granted,
+ provided that the above copyright notice appears in all copies.
+
+ Clemson University and its Dept. of Computer Science make no
+ representations about the suitability of this software for any
+ purpose. It is provided "as is" without express or implied warranty.
+
+ Original author: Mark Smotherman
+
+ -------------------------------------------------------------------------*/
+
+
+/* tpgen.c version 1.0
+ *
+ * generate a matrix transpose loop nest, with tiling and unrolling
+ * (timing code using getrusage is included in the generated program)
+ *
+ * mark smotherman
+ * mark@cs.clemson.edu
+ * clemson university
+ * 9 july 1993
+ *
+ * a loop nest can be described by the order of its loop indices, so
+ * this program takes as input a simple language describing these indices:
+ * <number>d ==> generate tiling loop for index i with step size of <number>
+ * <number>e ==> generate tiling loop for index j with step size of <number>
+ * <number>i ==> generate loop for index i with unrolling factor of <number>
+ * <number>j ==> generate loop for index j with unrolling factor of <number>
+ * ; ==> input terminator (required)
+ * rules are:
+ * i,j tokens must appear
+ * if d appears, it must appear before i
+ * if e appears, it must appear before j
+ * ; must appear
+ * matrix size is controlled by #define N in this program.
+ *
+ * this code was adapted from mmgen.c v1.2 and extended to generate pre-
+ * condition loops for unrolling factors that do not evenly divide the
+ * matrix size (or the tiling step size for loop nests with a tiling loop).
+ * note that this program only provides a preconditioning loop for the
+ * innermost loop. unrolling factors for non-innermost loops that do not
+ * evenly divide the matrix size (or step size) are not supported.
+ *
+ * my interest in this program generator is to hook it to a sentence
+ * generator and a minimum execution time finder, that is
+ * while((sentence=sgen())!=NULL){
+ * genprogram=tpgen(sentence);
+ * system("cc -O4 genprogram.c");
+ * system("a.out >> tpresults");
+ * }
+ * findmintime(tpresults);
+ * this will find the optimum algorithm for the host system via an
+ * exhaustive search.
+ *
+ * please report bugs and suggestions for enhancements to me.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#define N 500
+
+#define ALLOC1 temp1=(struct line *)malloc(sizeof(struct line));\
+temp1->indentcnt=indentcnt;
+
+#define LINK1 temp1->next=insertbefore;\
+insertafter->next=temp1;\
+insertafter=temp1;
+
+#define INSERT1 temp1->next=start;\
+start=temp1;
+
+#define ALLOC2 temp1=(struct line *)malloc(sizeof(struct line));\
+temp2=(struct line *)malloc(sizeof(struct line));\
+temp1->indentcnt=indentcnt;\
+temp2->indentcnt=indentcnt++;
+
+#define LINK2 temp1->next=temp2;\
+temp2->next=insertbefore;\
+insertafter->next=temp1;\
+insertafter=temp1;\
+insertbefore=temp2;
+
+struct line{ int indentcnt; char line[256]; struct line *next; };
+
+int indentcnt;
+int iflag,jflag;
+int ijflag,jiflag;
+int dflag,eflag;
+int counter;
+int iistep,jjstep;
+int iunroll,junroll;
+int precond;
+
+char c;
+int i,ttp,nt;
+char *p0;
+char tptype[80];
+char number[10];
+
+struct line *start,*head,*insertafter,*insertbefore,*temp1,*temp2;
+
+void processloop();
+void processstmt();
+
+main(){
+
+ indentcnt=0;
+ iflag=jflag=0;
+ ijflag=jiflag=0;
+ dflag=eflag=0;
+ iunroll=junroll=0;
+ counter=1;
+ precond=0;
+ ttp=0;
+
+ start=NULL;
+ ALLOC2
+ sprintf(temp1->line,"/* begin */\nt_start=second();\n");
+ sprintf(temp2->line,"/* end */\nt_end = second();\n");
+ head=temp1; temp1->next=temp2; temp2->next=NULL;
+ insertafter=temp1; insertbefore=temp2;
+
+ while((c=getchar())!=';'){
+ tptype[ttp++]=c;
+ if(isdigit(c)){
+ nt=0;
+ while(isdigit(c)){
+ number[nt++]=c;
+ c=getchar();
+ if(c==';'){ fprintf(stderr,"unexpected ;!\n"); exit(1); }
+ tptype[ttp++]=c;
+ }
+ number[nt]='\0';
+ sscanf(number,"%d",&counter);
+ }
+ switch(c){
+ case 'd':
+ if(iflag){ fprintf(stderr,"d cannot appear after i!\n"); exit(1); }
+ dflag++;
+ ALLOC1
+ sprintf(temp1->line,"#define IISTEP %d\n",counter);
+ INSERT1
+ iistep=counter;
+ counter=1;
+ ALLOC2
+ sprintf(temp1->line,"for(ii=0;ii<%d;ii+=IISTEP){\n",N);
+ sprintf(temp2->line,"}\n",N);
+ LINK2
+ ALLOC1
+ sprintf(temp1->line,"it=min(ii+IISTEP,%d);\n",N);
+ LINK1
+ break;
+ case 'e':
+ if(jflag){ fprintf(stderr,"e cannot appear after j!\n"); exit(1); }
+ eflag++;
+ ALLOC1
+ sprintf(temp1->line,"#define JJSTEP %d\n",counter);
+ INSERT1
+ jjstep=counter;
+ counter=1;
+ ALLOC2
+ sprintf(temp1->line,"for(jj=0;jj<%d;jj+=JJSTEP){\n",N);
+ sprintf(temp2->line,"}\n",N);
+ LINK2
+ ALLOC1
+ sprintf(temp1->line,"jt=min(jj+JJSTEP,%d);\n",N);
+ LINK1
+ break;
+ case 'i':
+ iunroll=counter;
+ counter=1;
+ iflag++; if(jflag) jiflag++;
+ if(dflag) precond=iistep%iunroll; else precond=N%iunroll;
+ if(precond&&(jiflag==0)){
+ fprintf(stderr,"unrolling factor for outer loop i\n");
+ fprintf(stderr," does not evenly divide matrix/step size!\n");
+ exit(1);
+ }
+ if(dflag&&(iunroll>1)&&(N%iistep)){
+ fprintf(stderr,"with unrolling of i, step size for tiled loop ii\n");
+ fprintf(stderr," does not evenly divide matrix size!\n");
+ exit(1);
+ }
+ processloop('i',dflag,iunroll,precond,junroll);
+ break;
+ case 'j':
+ junroll=counter;
+ counter=1;
+ jflag++; if(iflag) ijflag++;
+ if(eflag) precond=jjstep%junroll; else precond=N%junroll;
+ if(precond&&(ijflag==0)){
+ fprintf(stderr,"unrolling factor for outer loop j\n");
+ fprintf(stderr," does not evenly divide matrix/step size!\n");
+ exit(1);
+ }
+ if(eflag&&(junroll>1)&&(N%jjstep)){
+ fprintf(stderr,"with unrolling of j, step size for tiled loop jj\n");
+ fprintf(stderr," does not evenly divide matrix size!\n");
+ exit(1);
+ }
+ processloop('j',eflag,junroll,precond,iunroll);
+ break;
+ default: break;
+ }
+ }
+ processstmt();
+
+ tptype[ttp++]=c;
+
+ if((iflag==0)||(jflag==0)){
+ fprintf(stderr,
+ "one of the loops (i,j) was not specified!\n");
+ exit(1);
+ }
+
+ temp1=start;
+ while(temp1!=NULL){
+ printf("%s",temp1->line);
+ temp1=temp1->next;
+ }
+ printf("#include <stdio.h>\n");
+ printf("#include <sys/time.h>\n");
+ printf("#include <sys/resource.h>\n");
+ if(dflag|eflag) printf("#define min(a,b) ((a)<=(b)?(a):(b))\n");
+ printf("double second();\n");
+ printf("double t_start,t_end,t_total;\n");
+ printf("int times;\n");
+ printf("\ndouble b[%d][%d],dummy[10000],bt[%d][%d];\n\nmain(){\n"
+ ,N,N,N,N);
+ if(precond) printf(" int i,j,n;\n"); else printf(" int i,j;\n");
+ if(dflag) printf(" int ii,it;\n");
+ if(eflag) printf(" int jj,jt;\n");
+ printf("/* set coefficients so that result matrix should have \n");
+ printf(" * column entries equal to column index\n");
+ printf(" */\n");
+ printf(" for (i=0;i<%d;i++){\n",N);
+ printf(" for (j=0;j<%d;j++){\n",N);
+ printf(" b[i][j] = (double) i;\n");
+ printf(" }\n");
+ printf(" }\n");
+ printf("\n t_total=0.0;\n for(times=0;times<10;times++){\n\n",N);
+ printf("/* try to flush cache */\n");
+ printf(" for(i=0;i<10000;i++){\n",N);
+ printf(" dummy[i] = 0.0;\n");
+ printf(" }\n");
+ printf("%s",head->line);
+ temp1=head->next;
+ while(temp1!=NULL){
+ for(i=0;i<temp1->indentcnt;i++) printf(" ");
+ while((p0=strstr(temp1->line,"+0"))!=NULL){
+ *p0++=' '; *p0=' ';
+ }
+ printf("%s",temp1->line);
+ temp1=temp1->next;
+ }
+ printf("\n t_total+=t_end-t_start;\n }\n");
+ printf("/* check result */\n");
+ printf(" for (j=0;j<%d;j++){\n",N);
+ printf(" for (i=0;i<%d;i++){\n",N);
+ printf(" if (bt[i][j]!=((double)j)){\n");
+ printf(" fprintf(stderr,\"error in bt[%cd][%cd]",'%','%');
+ printf("\\n\",i,j);\n");
+ printf(" fprintf(stderr,\" for %s\\n\");\n",tptype);
+ printf(" exit(1);\n");
+ printf(" }\n");
+ printf(" }\n");
+ printf(" }\n");
+ tptype[ttp]='\0';
+ printf(" printf(\"%c10.2f secs\",t_total);\n",'%');
+ printf(" printf(\" for 10 runs of %s\\n\");\n",tptype);
+ printf("}\n");
+ printf("double second(){\n");
+ printf(" void getrusage();\n");
+ printf(" struct rusage ru;\n");
+ printf(" double t;\n");
+ printf(" getrusage(RUSAGE_SELF,&ru);\n");
+ printf(" t = ((double)ru.ru_utime.tv_sec) +\n");
+ printf(" ((double)ru.ru_utime.tv_usec)/1.0e6;\n");
+ printf(" return t;\n");
+ printf("}\n");
+
+}
+
+void processloop(index,flag,unroll,precond,unroll2)
+char index;
+int flag,unroll,precond,unroll2;
+{
+ char build[80],temp[40];
+ int n;
+ if(precond){
+ ALLOC1
+ sprintf(temp1->line,"/* preconditioning loop for unrolling factor */\n");
+ LINK1
+ if(unroll2==1){
+ build[0]='\0';
+ if(flag){
+ if(index='i')
+ sprintf(temp,"n=IISTEP%c%d; ",'%',unroll);
+ else
+ sprintf(temp,"n=JJSTEP%c%d; ",'%',unroll);
+ strcat(build,temp);
+ sprintf(temp,"for(%c=%c%c;%c<%c%c+n;%c++) ",index,index,index,
+ index,index,index,index);
+ strcat(build,temp);
+ }else{
+ sprintf(temp,"n=%d%c%d; ",N,'%',unroll);
+ strcat(build,temp);
+ sprintf(temp,"for(%c=0;%c<n;%c++) ",index,index,index);
+ strcat(build,temp);
+ }
+ sprintf(temp,"bt[i][j]=b[j][i];\n");
+ strcat(build,temp);
+ ALLOC1
+ sprintf(temp1->line,"%s\n",build);
+ LINK1
+ }else{
+ if(flag){
+ ALLOC1
+ if(index=='i')
+ sprintf(temp1->line,"n=IISTEP%c%d;\n",'%',unroll);
+ else
+ sprintf(temp1->line,"n=JJSTEP%c%d;\n",'%',unroll);
+ LINK1
+ ALLOC1
+ sprintf(temp1->line,"for(%c=%c%c;%c<%c%c+n;%c++){\n",index,index,index,
+ index,index,index,index);
+ LINK1
+ }else{
+ ALLOC1
+ sprintf(temp1->line,"n=%d%c%d;\n",N,'%',unroll);
+ LINK1
+ ALLOC1
+ sprintf(temp1->line,"for(%c=0;%c<n;%c++){\n",index,index,index);
+ LINK1
+ }
+ if(index=='i'){
+ for(n=0;n<unroll2;n++){
+ ALLOC1
+ sprintf(temp1->line," bt[i][j+%d]=b[j+%d][i];\n",n,n);
+ LINK1
+ }
+ }else{
+ for(n=0;n<unroll2;n++){
+ ALLOC1
+ sprintf(temp1->line," bt[i+%d][j]=b[j][i+%d];\n",n,n);
+ LINK1
+ }
+ }
+ ALLOC1
+ sprintf(temp1->line,"}\n");
+ LINK1
+ }
+ ALLOC2
+ if(flag){
+ sprintf(temp1->line,"for(%c=%c%c+n;%c<%ct;%c+=%d){\n",index,index,index,
+ index,index,index,unroll);
+ }else{
+ sprintf(temp1->line,"for(%c=n;%c<%d;%c+=%d){\n",index,index,N,index,
+ unroll);
+ }
+ sprintf(temp2->line,"}\n",N);
+ LINK2
+ }else{
+ ALLOC2
+ if(unroll==1){
+ if(flag){
+ sprintf(temp1->line,"for(%c=%c%c;%c<%ct;%c++){\n",index,index,index,
+ index,index,index);
+ }else{
+ sprintf(temp1->line,"for(%c=0;%c<%d;%c++){\n",index,index,N,index);
+ }
+ }else{
+ if(flag){
+ sprintf(temp1->line,"for(%c=%c%c;%c<%ct;%c+=%d){\n",index,index,index,
+ index,index,index,unroll);
+ }else{
+ sprintf(temp1->line,"for(%c=0;%c<%d;%c+=%d){\n",index,index,N,index,
+ unroll);
+ }
+ }
+ sprintf(temp2->line,"}\n",N);
+ LINK2
+ }
+}
+
+void processstmt()
+{
+ int i,j;
+ for(i=0;i<iunroll;i++){
+ for(j=0;j<junroll;j++){
+ ALLOC1
+ sprintf(temp1->line,"bt[i+%d][j+%d]=b[j+%d][i+%d];\n",i,j,j,i);
+ LINK1
+ }
+ }
+}
+--
+Mark Smotherman, Computer Science Dept., Clemson University, Clemson, SC
+
+=======================================================================
+From: has (h.genceli@bre.com)
+ Subject: transpose of a nxm matrix stored in a vector !!!
+ Newsgroups: sci.math.num-analysis
+ Date: 2000/07/25
+
+
+If I have a matrix nrows x ncols, I can store it in a vector.
+so A(i,j) is really a[i*ncols+j]. So really TRANS of A
+(say B) is really is also a vector B where
+
+0<=i b[j*nrows+i] <nrows, 0<=j<ncols
+b[j*nrows+i] = a[i*ncols+j].
+
+Fine but I want to use only one array a to do this transformation.
+
+i.e a[j*nrows+i] = a[i*ncols+j]. this will itself
+erase some elements so each time a swap is necessary in a loop.
+
+temp = a[j*nrows+i]
+a[j*nrows+i] = a[i*ncols+j]
+a[i*ncols+j] = temp
+
+but still this will lose some info as it is, so indexing
+should have more intelligence in it ???? anybody
+can give me a lead here, thanks.
+
+Has
+
+ From: wei-choon ng (wng@ux8.cso.uiuc.edu)
+ Subject: Re: transpose of a nxm matrix stored in a vector !!!
+ Newsgroups: sci.math.num-analysis
+ Date: 2000/07/25
+
+
+has <h.genceli@bre.com> wrote:
+> If I have a matrix nrows x ncols, I can store it in a vector.
+> so A(i,j) is really a[i*ncols+j]. So really TRANS of A
+> (say B) is really is also a vector B where
+
+[snip]
+
+Hey, if you just want to do a transpose-matrix vector multiply, there is
+no need to explicitly store the transpose matrix in another array and
+doubling the storage!
+
+W.C.
+--
+
+ From: Robin Becker (robin@jessikat.fsnet.co.uk)
+ Subject: Re: transpose of a nxm matrix stored in a vector !!!
+ Newsgroups: sci.math.num-analysis
+ Date: 2000/07/25
+
+
+In article <snr532fo3j1180@corp.supernews.com>, has <h.genceli@bre.com>
+writes
+>If I have a matrix nrows x ncols, I can store it in a vector.
+>so A(i,j) is really a[i*ncols+j]. So really TRANS of A
+>(say B) is really is also a vector B where
+>
+>0<=i b[j*nrows+i] <nrows, 0<=j<ncols
+>b[j*nrows+i] = a[i*ncols+j].
+>
+>Fine but I want to use only one array a to do this transformation.
+>
+>i.e a[j*nrows+i] = a[i*ncols+j]. this will itself
+>erase some elements so each time a swap is necessary in a loop.
+>
+>temp = a[j*nrows+i]
+>a[j*nrows+i] = a[i*ncols+j]
+>a[i*ncols+j] = temp
+>
+>but still this will lose some info as it is, so indexing
+>should have more intelligence in it ???? anybody
+>can give me a lead here, thanks.
+>
+>Has
+>
+>
+>
+
+void dmx_transpose(unsigned n, unsigned m, double* a, double* b)
+{
+ unsigned size = m*n;
+ if(b!=a){
+ real *bmn, *aij, *anm;
+ bmn = b + size; /*b+n*m*/
+ anm = a + size;
+ while(b<bmn) for(aij=a++;aij<anm; aij+=n ) *b++ = *aij;
+ }
+ else if(size>3){
+ unsigned i,row,column,current;
+ for(i=1, size -= 2;i<size;i++){
+ current = i;
+ do {
+ /*current = row+n*column*/
+ column = current/m;
+ row = current%m;
+ current = n*row + column;
+ } while(current < i);
+
+ if (current >i) {
+ real temp = a[i];
+ a[i] = a[current];
+ a[current] = temp;
+ }
+ }
+ }
+}
+--
+Robin Becker
+
+ From: E. Robert Tisdale (edwin@netwood.net)
+ Subject: Re: transpose of a nxm matrix stored in a vector !!!
+ Newsgroups: sci.math.num-analysis
+ Date: 2000/07/25
+
+
+Take a look at
+The C++ Scalar, Vector, Matrix and Tensor class library
+
+ http://www.netwood.net/~edwin/svmt/
+
+<Type><System>SubVector&
+ <Type><System>SubVector::transpose(Extent p, Extent q) {
+ <Type><System>SubVector&
+ v = *this;
+ if (1 < p && 1 < q) {
+ // A vector v of extent n = qp is viewed as a q by p matrix U and
+ // a p by q matrix V where U_{ij} = v_{p*i+j} and V_{ij} = v_{q*i+j}.
+ // The vector v is modified in-place so that V is the transpose of U.
+ // The algorithm searches for every sequence k_s of S indices
+ // such that a circular shift of elements v_{k_s} <-- v_{k_{s+1}}
+ // and v_{k_{S-1}} <-- v_{k_0} effects an in-place transpose.
+ Extent n = q*p;
+ Extent m = 0; // count up to n-2
+ Offset l = 0; // 1 <= l <= n-2
+ while (++l < n-1 && m < n-2) {
+ Offset k = l;
+ Offset j = k;
+ while (l < (k = (j%p)*q + j/p)) { // Search backward for k < l.
+ j = k;
+ }
+ // If a sequence of indices beginning with l has any index k < l,
+ // it has already been transposed. The sequence length S = 1
+ // and diagonal element v_k is its own transpose if k = j.
+ // Skip every index sequence that has already been transposed.
+ if (k == l) { // a new sequence
+ if (k < j) { // with 1 < S
+ TYPE x = v[k]; // save v_{k_0}
+ do {
+ v[k] = v[j]; // v_{k_{s}} <-- v_{k_{s+1}}
+ k = j;
+ ++m;
+ } while (l < (j = (k%q)*p + k/q));
+ v[k] = x; // v_{k_{S-1}} <-- v_{k_0}
+ }
+ ++m;
+ }
+ }
+ } return v;
+ }
+
+
+
+<Type><System>SubVector&
+
+Read the rest of this message... (50 more lines)
+
+ From: Victor Eijkhout (eijkhout@disco.cs.utk.edu)
+ Subject: Re: transpose of a nxm matrix stored in a vector !!!
+ Newsgroups: sci.math.num-analysis
+ Date: 2000/07/25
+
+
+"Alan Miller" <amiller @ vic.bigpond.net.au> writes:
+
+> The attached routine does an in situ transpose.
+> begin 666 Dtip.f90
+> M4U5"4D]55$E.12!D=&EP("AA+"!N,2P@;C(L(&YD:6TI#0HA("TM+2TM+2TM
+
+Hm. F90? You're not silently allocating a temporary I hope?
+
+(Why did you have to encode this? Now I have to save, this decode, ...
+and all for plain ascii?)
+
+--
+Victor Eijkhout
+"When I was coming up, [..] we knew exactly who the they were. It was us
+versus them, and it was clear who the them was were. Today, we are not
+so sure who the they are, but we know they're there." [G.W. Bush]
+
+ From: Alan Miller (amiller_@_vic.bigpond.net.au)
+ Subject: Re: transpose of a nxm matrix stored in a vector !!!
+ Newsgroups: sci.math.num-analysis
+ Date: 2000/07/25
+
+
+Victor Eijkhout wrote in message ...
+>"Alan Miller" <amiller @ vic.bigpond.net.au> writes:
+>
+>> The attached routine does an in situ transpose.
+>> begin 666 Dtip.f90
+>> M4U5"4D]55$E.12!D=&EP("AA+"!N,2P@;C(L(&YD:6TI#0HA("TM+2TM+2TM
+>
+>Hm. F90? You're not silently allocating a temporary I hope?
+>
+>(Why did you have to encode this? Now I have to save, this decode, ...
+>and all for plain ascii?)
+>
+
+I know the problem.
+I sometimes use a Unix system, and have to use decode64 to read
+attachments. On the other hand, Windows wraps lines around,
+formats then and generally makes the code unreadable.
+
+The straight code for dtip (double transpose in place) is attached
+this time.
+
+>--
+>Victor Eijkhout
+
+
+--
+Alan Miller, Retired Scientist (Statistician)
+CSIRO Mathematical & Information Sciences
+Alan.Miller -at- vic.cmis.csiro.au
+http://www.ozemail.com.au/~milleraj
+http://users.bigpond.net.au/amiller/
+
+
+=================================================================
+
+From: Darran Edmundson (dedmunds@sfu.ca)
+ Subject: array reordering algorithm?
+ Newsgroups: sci.math.num-analysis
+ Date: 1995/04/30
+
+
+A code I've written refers to a complex array as two separate real arrays.
+However, I have a canned subroutine which expects a single array where the
+real and imaginary values alternate. Essentially I have a case of mismatched
+data structures, yet for reasons that I'd rather not go into, I'm stuck with them.
+
+Assuming that the two real arrays A and B are sequential in memory, and
+that the single array of alternating real/imaginary values C shares the same
+space, what I need is a porting subroutine that remaps the data from one format
+to the other - using as little space as possible.
+
+I think of the problem as follows. Imagine an array of dimension 10 containing
+the values 1,3,5,7,9,2,4,6,8,10 in this order.
+
+ A(1) / 1 \ C(1)
+ A(2) | 3 | C(2)
+ A(3) | 5 | C(3)
+ A(4) | 7 | C(4)
+ A(5) \ 9 | C(5)
+ |
+ B(1) / 2 | C(6)
+ B(2) | 4 | C(7)
+ B(3) | 6 | C(8)
+ B(4) | 8 | C(9)
+ B(5) \ 10 / C(10)
+
+Given that I know this initial pattern, I want to sort the array C in-place *without
+making comparisons*. That is, the algorithm can only depend on the initial
+knowledge of the pattern. Do you see what a sort is going to do? It will
+make the A and B arrays alternate, i.e. C(1)=A(1), C(2)=B(1), C(3)=A(2),
+C(4)=B(2), etc. It's not a real sort though because I can't actually refer to the
+values above (i.e. no comparisons) because A and B will be holding real data,
+not this contrived pattern. The pattern above exists though - it's the
+natural ordering in memory of A and B.
+
+Either pair swapping only or a small amount of workspace can be used. The
+in-place is important - imagine scaling this problem up to an
+array of 32 or 64 million double precision values and you can easily see how
+duplicating the array is not a feasible solution.
+
+Any ideas? I've been stumped on this for a day and a half now.
+
+Darran Edmundson
+dedmunds@sfu.ca
+
+ From: Roger Critchlow (rec@elf115.elf.org)
+ Subject: Re: array reordering algorithm?
+ Newsgroups: sci.math.num-analysis
+ Date: 1995/04/30
+
+
+ Any ideas? I've been stumped on this for a day and a half now.
+
+Here's some code for in situ permutations of arrays that I wrote
+a few years ago. It all started from the in situ transposition
+algorithms in the Collected Algorithms of the ACM, the references
+for which always get lost during the decryption from fortran.
+
+This is the minimum space algorithm. All you need to supply is
+a function which computes the new order array index from the old
+order array index.
+
+If you can spare n*m bits to record the indexes of elements which
+have been permuted, then you can speed things up.
+
+-- rec --
+
+------------------------------------------------------------------------
+/*
+** Arbitrary in situ permutations of an m by n array of base type TYPE.
+** Copyright 1995 by Roger E Critchlow Jr, rec@elf.org, San Francisco, CA.
+** Fair use permitted, caveat emptor.
+*/
+typedef int TYPE;
+
+int transposition(int ij, int m, int n) /* transposition about diagonal from upper left to lower right */
+{ return ((ij%m)*n+ (ij/m)); }
+
+int countertrans(int ij, int m, int n) /* transposition about diagonal from upper right to lower left */
+{ return ((m-1-(ij%m))*n+ (n-1-(ij/m))); }
+
+int rotate90cw(int ij, int m, int n) /* 90 degree clockwise rotation */
+{ return ((m-1-(ij%m))*n+ (ij/m)); }
+
+int rotate90ccw(int ij, int m, int n) /* 90 degree counter clockwise rotation */
+{ return ((ij%m)*n+ (n-1-(ij/m))); }
+
+int rotate180(int ij, int m, int n) /* 180 degree rotation */
+{ return ((m-1-(ij/n))*n+ (n-1-(ij%n))); }
+
+int reflecth(int ij, int m, int n) /* reflection across horizontal plane */
+{ return ((m-1-(ij/n))*n+ (ij%n)); }
+
+int reflectv(int ij, int m, int n) /* reflection across vertical plane */
+{ return ((ij/n)*n+ (n-1-(ij%n))); }
+
+int in_situ_permutation(TYPE a[], int m, int n, int (*origination)(int ij, int m, int n))
+{
+ int ij, oij, dij, n_to_do;
+ TYPE b;
+ n_to_do = m*n;
+ for (ij = 0; ij < m*n && n_to_do > 0; ij += 1) {
+ /* Test for previously permuted */
+ for (oij = origination(ij,m,n); oij > ij; oij = origination(oij,m,n))
+ ;
+ if (oij < ij)
+ continue;
+ /* Chase the cycle */
+ dij = ij;
+ b = a[ij];
+ for (oij = origination(dij,m,n); oij != ij; oij = origination(dij,m,n)) {
+ a[dij] = a[oij];
+ dij = oij;
+ n_to_do -= 1;
+ }
+ a[dij] = b;
+ n_to_do -= 1;
+ } return 0;
+}
+
+#define TESTING 1
+#if TESTING
+
+/* fill a matrix with sequential numbers, row major ordering */
+void fill_matrix_rows(a, m, n) TYPE *a; int m, n;
+{
+ int i, j;
+ for (i = 0; i < m; i += 1)
+ for (j = 0; j < n; j += 1)
+ a[i*n+j] = i*n+j;
+}
+
+/* fill a matrix with sequential numbers, column major ordering */
+void fill_matrix_cols(a, m, n) TYPE *a; int m, n;
+{
+ int i, j;
+ for (i = 0; i < m; i += 1)
+ for (j = 0; j < n; j += 1)
+ a[i*n+j] = j*m+i;
+}
+
+/* test a matrix for sequential numbers, row major ordering */
+int test_matrix_rows(a, m, n) TYPE *a; int m, n;
+{
+ int i, j, o;
+ for (o = i = 0; i < m; i += 1)
+ for (j = 0; j < n; j += 1)
+ o += a[i*n+j] != i*n+j;
+ return o;
+}
+
+/* test a matrix for sequential numbers, column major ordering */
+int test_matrix_cols(a, m, n) TYPE *a; int m, n;
+{
+ int i, j, o;
+ for (o = i = 0; i < m; i += 1)
+ for (j = 0; j < n; j += 1)
+ o += a[i*n+j] != j*m+i;
+ return o;
+}
+
+/* print a matrix */
+void print_matrix(a, m, n) TYPE *a; int m, n;
+{
+ char *format;
+ int i, j;
+ if (m*n < 10) format = "%2d";
+ if (m*n < 100) format = "%3d";
+ if (m*n < 1000) format = "%4d";
+ if (m*n < 10000) format = "%5d";
+ for (i = 0; i < m; i += 1) {
+ for (j = 0; j < n; j += 1)
+ printf(format, a[i*n+j]);
+ printf("\n");
+ }
+}
+
+#if TEST_TRANSPOSE
+#define MAXSIZE 1000
+
+main()
+{
+ int i, j, m, n, o;
+ TYPE a[MAXSIZE];
+ for (m = 1; m < sizeof(a)/sizeof(a[0]); m += 1)
+ for (n = 1; m*n < sizeof(a)/sizeof(a[0]); n += 1) {
+ fill_matrix_rows(a, m, n); /* {0 1} {2 3} */
+ if (o = transpose(a, m, n))
+ printf(">> transpose returned %d for a[%d][%d], row major\n", o, m, n);
+ if ((o = test_matrix_cols(a, n, m)) != 0) /* {0 2} {1 3} */
+ printf(">> transpose made %d mistakes for a[%d][%d], row major\n", o, m, n);
+ /* column major */
+ fill_matrix_rows(a, m, n);
+ if (o = transpose(a, m, n))
+ printf(">> transpose returned %d for a[%d][%d], column major\n", o, m, n);
+ if ((o = test_matrix_cols(a, n, m)) != 0)
+ printf(">> transpose made %d mistakes for a[%d][%d], column major\n", o, m, n);
+ } return 0;
+}
+#endif /* TEST_TRANSPOSE */
+
+
+#define TEST_DISPLAY 1
+#if TEST_DISPLAY
+main(argc, argv) int argc; char *argv[];
+{
+ TYPE *a;
+ int m = 5, n = 5;
+ extern void *malloc();
+ if (argc > 1) {
+ m = atoi(argv[1]);
+ if (argc > 2)
+ n = atoi(argv[2]);
+ }
+ a = malloc(m*n*sizeof(TYPE));
+
+ printf("matrix\n");
+ fill_matrix_rows(a, m, n);
+ print_matrix(a, m, n);
+ printf("transposition\n");
+ in_situ_permutation(a, m, n, transposition);
+ print_matrix(a, n, m);
+
+ printf("counter transposition\n");
+ fill_matrix_rows(a, m, n);
+ in_situ_permutation(a, m, n, countertrans);
+ print_matrix(a, n, m);
+
+ printf("rotate 90 degrees clockwise\n");
+ fill_matrix_rows(a, m, n);
+ in_situ_permutation(a, m, n, rotate90cw);
+ print_matrix(a, n, m);
+
+ printf("rotate 90 degrees counterclockwise\n");
+ fill_matrix_rows(a, m, n);
+ in_situ_permutation(a, m, n, rotate90ccw);
+ print_matrix(a, n, m);
+
+ printf("rotate 180 degrees\n");
+ fill_matrix_rows(a, m, n);
+ in_situ_permutation(a, m, n, rotate180);
+ print_matrix(a, m, n);
+
+ printf("reflect across horizontal\n");
+ fill_matrix_rows(a, m, n);
+ in_situ_permutation(a, m, n, reflecth);
+ print_matrix(a, m, n);
+
+ printf("reflect across vertical\n");
+ fill_matrix_rows(a, m, n);
+ in_situ_permutation(a, m, n, reflectv);
+ print_matrix(a, m, n);
+
+ return 0;
+}
+
+#endif
+#endif
+